Napoleon: Move the type preprocessing code

This puts all preprocessing code above both docstring classes, rather than in between. This is in preparation to making both docstring classes share the same preprocessing.
sphinx-doc · Nov 24, 2024 · bb27724 · bb27724
1 parent b6b01c2
commit bb27724
Showing 1 changed file with 181 additions and 181 deletions.
diff --git a/sphinx/ext/napoleon/docstring.py b/sphinx/ext/napoleon/docstring.py
@@ -77,6 +77,187 @@ def next(self) -> Any:
             raise StopIteration
 
 
+def _recombine_set_tokens(tokens: list[str]) -> list[str]:
+    token_queue = collections.deque(tokens)
+    keywords = ('optional', 'default')
+
+    def takewhile_set(tokens: collections.deque[str]) -> Iterator[str]:
+        open_braces = 0
+        previous_token = None
+        while True:
+            try:
+                token = tokens.popleft()
+            except IndexError:
+                break
+
+            if token == ', ':
+                previous_token = token
+                continue
+
+            if not token.strip():
+                continue
+
+            if token in keywords:
+                tokens.appendleft(token)
+                if previous_token is not None:
+                    tokens.appendleft(previous_token)
+                break
+
+            if previous_token is not None:
+                yield previous_token
+                previous_token = None
+
+            if token == '{':
+                open_braces += 1
+            elif token == '}':
+                open_braces -= 1
+
+            yield token
+
+            if open_braces == 0:
+                break
+
+    def combine_set(tokens: collections.deque[str]) -> Iterator[str]:
+        while True:
+            try:
+                token = tokens.popleft()
+            except IndexError:
+                break
+
+            if token == '{':
+                tokens.appendleft('{')
+                yield ''.join(takewhile_set(tokens))
+            else:
+                yield token
+
+    return list(combine_set(token_queue))
+
+
+def _tokenize_type_spec(spec: str) -> list[str]:
+    def postprocess(item: str) -> list[str]:
+        if _default_regex.match(item):
+            default = item[:7]
+            # can't be separated by anything other than a single space
+            # for now
+            other = item[8:]
+
+            return [default, ' ', other]
+        else:
+            return [item]
+
+    tokens = [
+        item
+        for raw_token in _token_regex.split(spec)
+        for item in postprocess(raw_token)
+        if item
+    ]
+    return tokens
+
+
+def _token_type(token: str, location: str | None = None) -> str:
+    def is_numeric(token: str) -> bool:
+        try:
+            # use complex to make sure every numeric value is detected as literal
+            complex(token)
+        except ValueError:
+            return False
+        else:
+            return True
+
+    if token.startswith(' ') or token.endswith(' '):
+        type_ = 'delimiter'
+    elif (
+        is_numeric(token)
+        or (token.startswith('{') and token.endswith('}'))
+        or (token.startswith('"') and token.endswith('"'))
+        or (token.startswith("'") and token.endswith("'"))
+    ):
+        type_ = 'literal'
+    elif token.startswith('{'):
+        logger.warning(
+            __('invalid value set (missing closing brace): %s'),
+            token,
+            location=location,
+        )
+        type_ = 'literal'
+    elif token.endswith('}'):
+        logger.warning(
+            __('invalid value set (missing opening brace): %s'),
+            token,
+            location=location,
+        )
+        type_ = 'literal'
+    elif token.startswith(("'", '"')):
+        logger.warning(
+            __('malformed string literal (missing closing quote): %s'),
+            token,
+            location=location,
+        )
+        type_ = 'literal'
+    elif token.endswith(("'", '"')):
+        logger.warning(
+            __('malformed string literal (missing opening quote): %s'),
+            token,
+            location=location,
+        )
+        type_ = 'literal'
+    elif token in {'optional', 'default'}:
+        # default is not a official keyword (yet) but supported by the
+        # reference implementation (numpydoc) and widely used
+        type_ = 'control'
+    elif _xref_regex.match(token):
+        type_ = 'reference'
+    else:
+        type_ = 'obj'
+
+    return type_
+
+
+def _convert_numpy_type_spec(
+    _type: str,
+    location: str | None = None,
+    translations: dict[str, str] | None = None,
+) -> str:
+    if translations is None:
+        translations = {}
+
+    def convert_obj(
+        obj: str, translations: dict[str, str], default_translation: str
+    ) -> str:
+        translation = translations.get(obj, obj)
+
+        # use :class: (the default) only if obj is not a standard singleton
+        if translation in _SINGLETONS and default_translation == ':class:`%s`':
+            default_translation = ':obj:`%s`'
+        elif translation == '...' and default_translation == ':class:`%s`':
+            # allow referencing the builtin ...
+            default_translation = ':obj:`%s <Ellipsis>`'
+
+        if _xref_regex.match(translation) is None:
+            translation = default_translation % translation
+
+        return translation
+
+    tokens = _tokenize_type_spec(_type)
+    combined_tokens = _recombine_set_tokens(tokens)
+    types = [(token, _token_type(token, location)) for token in combined_tokens]
+
+    converters = {
+        'literal': lambda x: '``%s``' % x,
+        'obj': lambda x: convert_obj(x, translations, ':class:`%s`'),
+        'control': lambda x: '*%s*' % x,
+        'delimiter': lambda x: x,
+        'reference': lambda x: x,
+    }
+
+    converted = ''.join(
+        converters.get(type_)(token)  # type: ignore[misc]
+        for token, type_ in types
+    )
+
+    return converted
+
+
 def _convert_type_spec(_type: str, translations: dict[str, str] | None = None) -> str:
     """Convert type specification to reference in reST."""
     if translations is not None and _type in translations:
@@ -914,187 +1095,6 @@ def _lookup_annotation(self, _name: str) -> str:
         return ''
 
 
-def _recombine_set_tokens(tokens: list[str]) -> list[str]:
-    token_queue = collections.deque(tokens)
-    keywords = ('optional', 'default')
-
-    def takewhile_set(tokens: collections.deque[str]) -> Iterator[str]:
-        open_braces = 0
-        previous_token = None
-        while True:
-            try:
-                token = tokens.popleft()
-            except IndexError:
-                break
-
-            if token == ', ':
-                previous_token = token
-                continue
-
-            if not token.strip():
-                continue
-
-            if token in keywords:
-                tokens.appendleft(token)
-                if previous_token is not None:
-                    tokens.appendleft(previous_token)
-                break
-
-            if previous_token is not None:
-                yield previous_token
-                previous_token = None
-
-            if token == '{':
-                open_braces += 1
-            elif token == '}':
-                open_braces -= 1
-
-            yield token
-
-            if open_braces == 0:
-                break
-
-    def combine_set(tokens: collections.deque[str]) -> Iterator[str]:
-        while True:
-            try:
-                token = tokens.popleft()
-            except IndexError:
-                break
-
-            if token == '{':
-                tokens.appendleft('{')
-                yield ''.join(takewhile_set(tokens))
-            else:
-                yield token
-
-    return list(combine_set(token_queue))
-
-
-def _tokenize_type_spec(spec: str) -> list[str]:
-    def postprocess(item: str) -> list[str]:
-        if _default_regex.match(item):
-            default = item[:7]
-            # can't be separated by anything other than a single space
-            # for now
-            other = item[8:]
-
-            return [default, ' ', other]
-        else:
-            return [item]
-
-    tokens = [
-        item
-        for raw_token in _token_regex.split(spec)
-        for item in postprocess(raw_token)
-        if item
-    ]
-    return tokens
-
-
-def _token_type(token: str, location: str | None = None) -> str:
-    def is_numeric(token: str) -> bool:
-        try:
-            # use complex to make sure every numeric value is detected as literal
-            complex(token)
-        except ValueError:
-            return False
-        else:
-            return True
-
-    if token.startswith(' ') or token.endswith(' '):
-        type_ = 'delimiter'
-    elif (
-        is_numeric(token)
-        or (token.startswith('{') and token.endswith('}'))
-        or (token.startswith('"') and token.endswith('"'))
-        or (token.startswith("'") and token.endswith("'"))
-    ):
-        type_ = 'literal'
-    elif token.startswith('{'):
-        logger.warning(
-            __('invalid value set (missing closing brace): %s'),
-            token,
-            location=location,
-        )
-        type_ = 'literal'
-    elif token.endswith('}'):
-        logger.warning(
-            __('invalid value set (missing opening brace): %s'),
-            token,
-            location=location,
-        )
-        type_ = 'literal'
-    elif token.startswith(("'", '"')):
-        logger.warning(
-            __('malformed string literal (missing closing quote): %s'),
-            token,
-            location=location,
-        )
-        type_ = 'literal'
-    elif token.endswith(("'", '"')):
-        logger.warning(
-            __('malformed string literal (missing opening quote): %s'),
-            token,
-            location=location,
-        )
-        type_ = 'literal'
-    elif token in {'optional', 'default'}:
-        # default is not a official keyword (yet) but supported by the
-        # reference implementation (numpydoc) and widely used
-        type_ = 'control'
-    elif _xref_regex.match(token):
-        type_ = 'reference'
-    else:
-        type_ = 'obj'
-
-    return type_
-
-
-def _convert_numpy_type_spec(
-    _type: str,
-    location: str | None = None,
-    translations: dict[str, str] | None = None,
-) -> str:
-    if translations is None:
-        translations = {}
-
-    def convert_obj(
-        obj: str, translations: dict[str, str], default_translation: str
-    ) -> str:
-        translation = translations.get(obj, obj)
-
-        # use :class: (the default) only if obj is not a standard singleton
-        if translation in _SINGLETONS and default_translation == ':class:`%s`':
-            default_translation = ':obj:`%s`'
-        elif translation == '...' and default_translation == ':class:`%s`':
-            # allow referencing the builtin ...
-            default_translation = ':obj:`%s <Ellipsis>`'
-
-        if _xref_regex.match(translation) is None:
-            translation = default_translation % translation
-
-        return translation
-
-    tokens = _tokenize_type_spec(_type)
-    combined_tokens = _recombine_set_tokens(tokens)
-    types = [(token, _token_type(token, location)) for token in combined_tokens]
-
-    converters = {
-        'literal': lambda x: '``%s``' % x,
-        'obj': lambda x: convert_obj(x, translations, ':class:`%s`'),
-        'control': lambda x: '*%s*' % x,
-        'delimiter': lambda x: x,
-        'reference': lambda x: x,
-    }
-
-    converted = ''.join(
-        converters.get(type_)(token)  # type: ignore[misc]
-        for token, type_ in types
-    )
-
-    return converted
-
-
 class NumpyDocstring(GoogleDocstring):
     """Convert NumPy style docstrings to reStructuredText.