diff --git a/sphinx/ext/napoleon/docstring.py b/sphinx/ext/napoleon/docstring.py index d9f1eb357dd..84dfc1e70c5 100644 --- a/sphinx/ext/napoleon/docstring.py +++ b/sphinx/ext/napoleon/docstring.py @@ -77,6 +77,187 @@ def next(self) -> Any: raise StopIteration +def _recombine_set_tokens(tokens: list[str]) -> list[str]: + token_queue = collections.deque(tokens) + keywords = ('optional', 'default') + + def takewhile_set(tokens: collections.deque[str]) -> Iterator[str]: + open_braces = 0 + previous_token = None + while True: + try: + token = tokens.popleft() + except IndexError: + break + + if token == ', ': + previous_token = token + continue + + if not token.strip(): + continue + + if token in keywords: + tokens.appendleft(token) + if previous_token is not None: + tokens.appendleft(previous_token) + break + + if previous_token is not None: + yield previous_token + previous_token = None + + if token == '{': + open_braces += 1 + elif token == '}': + open_braces -= 1 + + yield token + + if open_braces == 0: + break + + def combine_set(tokens: collections.deque[str]) -> Iterator[str]: + while True: + try: + token = tokens.popleft() + except IndexError: + break + + if token == '{': + tokens.appendleft('{') + yield ''.join(takewhile_set(tokens)) + else: + yield token + + return list(combine_set(token_queue)) + + +def _tokenize_type_spec(spec: str) -> list[str]: + def postprocess(item: str) -> list[str]: + if _default_regex.match(item): + default = item[:7] + # can't be separated by anything other than a single space + # for now + other = item[8:] + + return [default, ' ', other] + else: + return [item] + + tokens = [ + item + for raw_token in _token_regex.split(spec) + for item in postprocess(raw_token) + if item + ] + return tokens + + +def _token_type(token: str, location: str | None = None) -> str: + def is_numeric(token: str) -> bool: + try: + # use complex to make sure every numeric value is detected as literal + complex(token) + except ValueError: + return False + else: + return True + + if token.startswith(' ') or token.endswith(' '): + type_ = 'delimiter' + elif ( + is_numeric(token) + or (token.startswith('{') and token.endswith('}')) + or (token.startswith('"') and token.endswith('"')) + or (token.startswith("'") and token.endswith("'")) + ): + type_ = 'literal' + elif token.startswith('{'): + logger.warning( + __('invalid value set (missing closing brace): %s'), + token, + location=location, + ) + type_ = 'literal' + elif token.endswith('}'): + logger.warning( + __('invalid value set (missing opening brace): %s'), + token, + location=location, + ) + type_ = 'literal' + elif token.startswith(("'", '"')): + logger.warning( + __('malformed string literal (missing closing quote): %s'), + token, + location=location, + ) + type_ = 'literal' + elif token.endswith(("'", '"')): + logger.warning( + __('malformed string literal (missing opening quote): %s'), + token, + location=location, + ) + type_ = 'literal' + elif token in {'optional', 'default'}: + # default is not a official keyword (yet) but supported by the + # reference implementation (numpydoc) and widely used + type_ = 'control' + elif _xref_regex.match(token): + type_ = 'reference' + else: + type_ = 'obj' + + return type_ + + +def _convert_numpy_type_spec( + _type: str, + location: str | None = None, + translations: dict[str, str] | None = None, +) -> str: + if translations is None: + translations = {} + + def convert_obj( + obj: str, translations: dict[str, str], default_translation: str + ) -> str: + translation = translations.get(obj, obj) + + # use :class: (the default) only if obj is not a standard singleton + if translation in _SINGLETONS and default_translation == ':class:`%s`': + default_translation = ':obj:`%s`' + elif translation == '...' and default_translation == ':class:`%s`': + # allow referencing the builtin ... + default_translation = ':obj:`%s `' + + if _xref_regex.match(translation) is None: + translation = default_translation % translation + + return translation + + tokens = _tokenize_type_spec(_type) + combined_tokens = _recombine_set_tokens(tokens) + types = [(token, _token_type(token, location)) for token in combined_tokens] + + converters = { + 'literal': lambda x: '``%s``' % x, + 'obj': lambda x: convert_obj(x, translations, ':class:`%s`'), + 'control': lambda x: '*%s*' % x, + 'delimiter': lambda x: x, + 'reference': lambda x: x, + } + + converted = ''.join( + converters.get(type_)(token) # type: ignore[misc] + for token, type_ in types + ) + + return converted + + def _convert_type_spec(_type: str, translations: dict[str, str] | None = None) -> str: """Convert type specification to reference in reST.""" if translations is not None and _type in translations: @@ -914,187 +1095,6 @@ def _lookup_annotation(self, _name: str) -> str: return '' -def _recombine_set_tokens(tokens: list[str]) -> list[str]: - token_queue = collections.deque(tokens) - keywords = ('optional', 'default') - - def takewhile_set(tokens: collections.deque[str]) -> Iterator[str]: - open_braces = 0 - previous_token = None - while True: - try: - token = tokens.popleft() - except IndexError: - break - - if token == ', ': - previous_token = token - continue - - if not token.strip(): - continue - - if token in keywords: - tokens.appendleft(token) - if previous_token is not None: - tokens.appendleft(previous_token) - break - - if previous_token is not None: - yield previous_token - previous_token = None - - if token == '{': - open_braces += 1 - elif token == '}': - open_braces -= 1 - - yield token - - if open_braces == 0: - break - - def combine_set(tokens: collections.deque[str]) -> Iterator[str]: - while True: - try: - token = tokens.popleft() - except IndexError: - break - - if token == '{': - tokens.appendleft('{') - yield ''.join(takewhile_set(tokens)) - else: - yield token - - return list(combine_set(token_queue)) - - -def _tokenize_type_spec(spec: str) -> list[str]: - def postprocess(item: str) -> list[str]: - if _default_regex.match(item): - default = item[:7] - # can't be separated by anything other than a single space - # for now - other = item[8:] - - return [default, ' ', other] - else: - return [item] - - tokens = [ - item - for raw_token in _token_regex.split(spec) - for item in postprocess(raw_token) - if item - ] - return tokens - - -def _token_type(token: str, location: str | None = None) -> str: - def is_numeric(token: str) -> bool: - try: - # use complex to make sure every numeric value is detected as literal - complex(token) - except ValueError: - return False - else: - return True - - if token.startswith(' ') or token.endswith(' '): - type_ = 'delimiter' - elif ( - is_numeric(token) - or (token.startswith('{') and token.endswith('}')) - or (token.startswith('"') and token.endswith('"')) - or (token.startswith("'") and token.endswith("'")) - ): - type_ = 'literal' - elif token.startswith('{'): - logger.warning( - __('invalid value set (missing closing brace): %s'), - token, - location=location, - ) - type_ = 'literal' - elif token.endswith('}'): - logger.warning( - __('invalid value set (missing opening brace): %s'), - token, - location=location, - ) - type_ = 'literal' - elif token.startswith(("'", '"')): - logger.warning( - __('malformed string literal (missing closing quote): %s'), - token, - location=location, - ) - type_ = 'literal' - elif token.endswith(("'", '"')): - logger.warning( - __('malformed string literal (missing opening quote): %s'), - token, - location=location, - ) - type_ = 'literal' - elif token in {'optional', 'default'}: - # default is not a official keyword (yet) but supported by the - # reference implementation (numpydoc) and widely used - type_ = 'control' - elif _xref_regex.match(token): - type_ = 'reference' - else: - type_ = 'obj' - - return type_ - - -def _convert_numpy_type_spec( - _type: str, - location: str | None = None, - translations: dict[str, str] | None = None, -) -> str: - if translations is None: - translations = {} - - def convert_obj( - obj: str, translations: dict[str, str], default_translation: str - ) -> str: - translation = translations.get(obj, obj) - - # use :class: (the default) only if obj is not a standard singleton - if translation in _SINGLETONS and default_translation == ':class:`%s`': - default_translation = ':obj:`%s`' - elif translation == '...' and default_translation == ':class:`%s`': - # allow referencing the builtin ... - default_translation = ':obj:`%s `' - - if _xref_regex.match(translation) is None: - translation = default_translation % translation - - return translation - - tokens = _tokenize_type_spec(_type) - combined_tokens = _recombine_set_tokens(tokens) - types = [(token, _token_type(token, location)) for token in combined_tokens] - - converters = { - 'literal': lambda x: '``%s``' % x, - 'obj': lambda x: convert_obj(x, translations, ':class:`%s`'), - 'control': lambda x: '*%s*' % x, - 'delimiter': lambda x: x, - 'reference': lambda x: x, - } - - converted = ''.join( - converters.get(type_)(token) # type: ignore[misc] - for token, type_ in types - ) - - return converted - - class NumpyDocstring(GoogleDocstring): """Convert NumPy style docstrings to reStructuredText.