Skip to content

Commit

Permalink
Napoleon: Move the type preprocessing code
Browse files Browse the repository at this point in the history
This puts all preprocessing code above both docstring classes, rather
than in between. This is in preparation to making both docstring classes
share the same preprocessing.
  • Loading branch information
cbarrick committed Dec 21, 2024
1 parent df3d94f commit 0a5f819
Showing 1 changed file with 181 additions and 181 deletions.
362 changes: 181 additions & 181 deletions sphinx/ext/napoleon/docstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,187 @@ def next(self) -> Any:
raise StopIteration


def _recombine_set_tokens(tokens: list[str]) -> list[str]:
token_queue = collections.deque(tokens)
keywords = ('optional', 'default')

def takewhile_set(tokens: collections.deque[str]) -> Iterator[str]:
open_braces = 0
previous_token = None
while True:
try:
token = tokens.popleft()
except IndexError:
break

if token == ', ':
previous_token = token
continue

if not token.strip():
continue

if token in keywords:
tokens.appendleft(token)
if previous_token is not None:
tokens.appendleft(previous_token)
break

if previous_token is not None:
yield previous_token
previous_token = None

if token == '{':
open_braces += 1
elif token == '}':
open_braces -= 1

yield token

if open_braces == 0:
break

def combine_set(tokens: collections.deque[str]) -> Iterator[str]:
while True:
try:
token = tokens.popleft()
except IndexError:
break

if token == '{':
tokens.appendleft('{')
yield ''.join(takewhile_set(tokens))
else:
yield token

return list(combine_set(token_queue))


def _tokenize_type_spec(spec: str) -> list[str]:
def postprocess(item: str) -> list[str]:
if _default_regex.match(item):
default = item[:7]
# can't be separated by anything other than a single space
# for now
other = item[8:]

return [default, ' ', other]
else:
return [item]

tokens = [
item
for raw_token in _token_regex.split(spec)
for item in postprocess(raw_token)
if item
]
return tokens


def _token_type(token: str, location: str | None = None) -> str:
def is_numeric(token: str) -> bool:
try:
# use complex to make sure every numeric value is detected as literal
complex(token)
except ValueError:
return False
else:
return True

if token.startswith(' ') or token.endswith(' '):
type_ = 'delimiter'
elif (
is_numeric(token)
or (token.startswith('{') and token.endswith('}'))
or (token.startswith('"') and token.endswith('"'))
or (token.startswith("'") and token.endswith("'"))
):
type_ = 'literal'
elif token.startswith('{'):
logger.warning(
__('invalid value set (missing closing brace): %s'),
token,
location=location,
)
type_ = 'literal'
elif token.endswith('}'):
logger.warning(
__('invalid value set (missing opening brace): %s'),
token,
location=location,
)
type_ = 'literal'
elif token.startswith(("'", '"')):
logger.warning(
__('malformed string literal (missing closing quote): %s'),
token,
location=location,
)
type_ = 'literal'
elif token.endswith(("'", '"')):
logger.warning(
__('malformed string literal (missing opening quote): %s'),
token,
location=location,
)
type_ = 'literal'
elif token in {'optional', 'default'}:
# default is not a official keyword (yet) but supported by the
# reference implementation (numpydoc) and widely used
type_ = 'control'
elif _xref_regex.match(token):
type_ = 'reference'
else:
type_ = 'obj'

return type_


def _convert_numpy_type_spec(
_type: str,
location: str | None = None,
translations: dict[str, str] | None = None,
) -> str:
if translations is None:
translations = {}

def convert_obj(
obj: str, translations: dict[str, str], default_translation: str
) -> str:
translation = translations.get(obj, obj)

# use :class: (the default) only if obj is not a standard singleton
if translation in _SINGLETONS and default_translation == ':class:`%s`':
default_translation = ':obj:`%s`'
elif translation == '...' and default_translation == ':class:`%s`':
# allow referencing the builtin ...
default_translation = ':obj:`%s <Ellipsis>`'

if _xref_regex.match(translation) is None:
translation = default_translation % translation

return translation

tokens = _tokenize_type_spec(_type)
combined_tokens = _recombine_set_tokens(tokens)
types = [(token, _token_type(token, location)) for token in combined_tokens]

converters = {
'literal': lambda x: '``%s``' % x,
'obj': lambda x: convert_obj(x, translations, ':class:`%s`'),
'control': lambda x: '*%s*' % x,
'delimiter': lambda x: x,
'reference': lambda x: x,
}

converted = ''.join(
converters.get(type_)(token) # type: ignore[misc]
for token, type_ in types
)

return converted


def _convert_type_spec(_type: str, translations: dict[str, str] | None = None) -> str:
"""Convert type specification to reference in reST."""
if translations is not None and _type in translations:
Expand Down Expand Up @@ -914,187 +1095,6 @@ def _lookup_annotation(self, _name: str) -> str:
return ''


def _recombine_set_tokens(tokens: list[str]) -> list[str]:
token_queue = collections.deque(tokens)
keywords = ('optional', 'default')

def takewhile_set(tokens: collections.deque[str]) -> Iterator[str]:
open_braces = 0
previous_token = None
while True:
try:
token = tokens.popleft()
except IndexError:
break

if token == ', ':
previous_token = token
continue

if not token.strip():
continue

if token in keywords:
tokens.appendleft(token)
if previous_token is not None:
tokens.appendleft(previous_token)
break

if previous_token is not None:
yield previous_token
previous_token = None

if token == '{':
open_braces += 1
elif token == '}':
open_braces -= 1

yield token

if open_braces == 0:
break

def combine_set(tokens: collections.deque[str]) -> Iterator[str]:
while True:
try:
token = tokens.popleft()
except IndexError:
break

if token == '{':
tokens.appendleft('{')
yield ''.join(takewhile_set(tokens))
else:
yield token

return list(combine_set(token_queue))


def _tokenize_type_spec(spec: str) -> list[str]:
def postprocess(item: str) -> list[str]:
if _default_regex.match(item):
default = item[:7]
# can't be separated by anything other than a single space
# for now
other = item[8:]

return [default, ' ', other]
else:
return [item]

tokens = [
item
for raw_token in _token_regex.split(spec)
for item in postprocess(raw_token)
if item
]
return tokens


def _token_type(token: str, location: str | None = None) -> str:
def is_numeric(token: str) -> bool:
try:
# use complex to make sure every numeric value is detected as literal
complex(token)
except ValueError:
return False
else:
return True

if token.startswith(' ') or token.endswith(' '):
type_ = 'delimiter'
elif (
is_numeric(token)
or (token.startswith('{') and token.endswith('}'))
or (token.startswith('"') and token.endswith('"'))
or (token.startswith("'") and token.endswith("'"))
):
type_ = 'literal'
elif token.startswith('{'):
logger.warning(
__('invalid value set (missing closing brace): %s'),
token,
location=location,
)
type_ = 'literal'
elif token.endswith('}'):
logger.warning(
__('invalid value set (missing opening brace): %s'),
token,
location=location,
)
type_ = 'literal'
elif token.startswith(("'", '"')):
logger.warning(
__('malformed string literal (missing closing quote): %s'),
token,
location=location,
)
type_ = 'literal'
elif token.endswith(("'", '"')):
logger.warning(
__('malformed string literal (missing opening quote): %s'),
token,
location=location,
)
type_ = 'literal'
elif token in {'optional', 'default'}:
# default is not a official keyword (yet) but supported by the
# reference implementation (numpydoc) and widely used
type_ = 'control'
elif _xref_regex.match(token):
type_ = 'reference'
else:
type_ = 'obj'

return type_


def _convert_numpy_type_spec(
_type: str,
location: str | None = None,
translations: dict[str, str] | None = None,
) -> str:
if translations is None:
translations = {}

def convert_obj(
obj: str, translations: dict[str, str], default_translation: str
) -> str:
translation = translations.get(obj, obj)

# use :class: (the default) only if obj is not a standard singleton
if translation in _SINGLETONS and default_translation == ':class:`%s`':
default_translation = ':obj:`%s`'
elif translation == '...' and default_translation == ':class:`%s`':
# allow referencing the builtin ...
default_translation = ':obj:`%s <Ellipsis>`'

if _xref_regex.match(translation) is None:
translation = default_translation % translation

return translation

tokens = _tokenize_type_spec(_type)
combined_tokens = _recombine_set_tokens(tokens)
types = [(token, _token_type(token, location)) for token in combined_tokens]

converters = {
'literal': lambda x: '``%s``' % x,
'obj': lambda x: convert_obj(x, translations, ':class:`%s`'),
'control': lambda x: '*%s*' % x,
'delimiter': lambda x: x,
'reference': lambda x: x,
}

converted = ''.join(
converters.get(type_)(token) # type: ignore[misc]
for token, type_ in types
)

return converted


class NumpyDocstring(GoogleDocstring):
"""Convert NumPy style docstrings to reStructuredText.
Expand Down

0 comments on commit 0a5f819

Please sign in to comment.