diff --git a/src/main.py b/src/main.py index b158c83..ebd76df 100644 --- a/src/main.py +++ b/src/main.py @@ -31,11 +31,9 @@ from typing import Generator, Union, Dict, Tuple, Set from .log import log -from .assets import assets from .parse import * from .render import * from .prerender import Prerenderer -from .reference import ManualRef try: # version.py is generated at build time, so we are running from the proper @@ -51,21 +49,6 @@ # which case the module name will be inferred. BasePathsType = Dict[Union[Tuple[str, ...], None], Set[str]] -# Files from the assets directory to be copied -ASSETS = [ - 'luadox.css', - 'prism.css', - 'prism.js', - 'js-search.min.js', - 'search.js', - 'img/i-left.svg', - 'img/i-right.svg', - 'img/i-download.svg', - 'img/i-github.svg', - 'img/i-gitlab.svg', - 'img/i-bitbucket.svg', -] - class FullHelpParser(argparse.ArgumentParser): def error(self, message: str) -> None: sys.stderr.write('error: %s\n' % message) @@ -265,43 +248,9 @@ def main(): try: log.info('prerendering %d pages', len(parser.topsyms)) toprefs = Prerenderer(parser).process() - - for ref in toprefs: - if ref.userdata.get('empty') and ref.implicit: - # Reference has no content and it was also implicitly generated, so we don't render it. - log.info('not rendering empty %s %s', ref.type, ref.name) - continue - if isinstance(ref, ManualRef) and ref.name == 'index': - typedir = outdir - else: - typedir = os.path.join(outdir, ref.type) - os.makedirs(typedir, exist_ok=True) - outfile = os.path.join(typedir, ref.name + '.html') - log.info('rendering %s %s -> %s', ref.type, ref.name, outfile) - html = renderer.render(ref) - with open(outfile, 'w', encoding='utf8') as f: - f.write(html) - - js = renderer.render_search_index() - with open(os.path.join(outdir, 'index.js'), 'w', encoding='utf8') as f: - f.write(js) - - html = renderer.render_search_page() - with open(os.path.join(outdir, 'search.html'), 'w', encoding='utf8') as f: - f.write(html) - - if not parser.get_reference(ManualRef, 'index'): - # The user hasn't specified an index manual page, so we generate a blank - # landing page that at least presents the sidebar with available links. - html = renderer.render_landing_page() - with open(os.path.join(outdir, 'index.html'), 'w', encoding='utf8') as f: - f.write(html) - - for name in ASSETS: - outfile = os.path.join(outdir, name) - if os.path.dirname(name): - os.makedirs(os.path.dirname(outfile), exist_ok=True) - with open(outfile, 'wb') as f: - f.write(assets.get(name)) + renderer.render(toprefs, outdir) except Exception as e: log.exception('unhandled error rendering around %s:%s: %s', parser.ctx.file, parser.ctx.line, e) + sys.exit(1) + + log.info('done') diff --git a/src/parse.py b/src/parse.py index a32208a..29d8863 100644 --- a/src/parse.py +++ b/src/parse.py @@ -18,7 +18,7 @@ import re from collections import OrderedDict from configparser import ConfigParser -from typing import IO, Optional, Union, Tuple, List, Dict, Type +from typing import IO, Optional, Union, Tuple, List, Dict, Type, Match from .log import log from .reference import * @@ -63,6 +63,14 @@ def update(self, file: Union[str, None, Sentinel]=UNDEF, self.line = line +class ReferenceDict(dict): + """ + Dictionary keyed by ref type whose value is a list of references of that same type. + + This is a simple pattern to improve type clarity. + """ + def __getitem__(self, k: Type[RefT]) -> List[RefT]: + return super().__getitem__(k) class ParseError(ValueError): pass @@ -79,7 +87,7 @@ class Parser: def __init__(self, config: ConfigParser) -> None: self.config = config # A complete list of all Reference objects keyed by Reference subclass type - self.parsed: dict[Type[Reference], list[Reference]] = { + self.parsed = ReferenceDict({ ModuleRef: [], ClassRef: [], FunctionRef: [], @@ -87,7 +95,7 @@ def __init__(self, config: ConfigParser) -> None: SectionRef: [], TableRef: [], ManualRef: [], - } + }) # A dict of only top-level References ("toprefs"), keyed by the fully qualified # name of the reference. # @@ -109,6 +117,8 @@ def __init__(self, config: ConfigParser) -> None: # # name -> Reference self.refs: dict[str, Reference] = {} + # Maps refs by their ids, rather than names + self.refs_by_id: dict[str, Reference] = {} # This holds the context of the current file and reference being processed self.ctx = Context() @@ -148,10 +158,10 @@ def _parse_function(self, line: str) -> ParseFuncResult: found. """ # Form: function foo(bar, baz) - m = re.search(r'''\bfunction *([^\s(]+) *\(([^)]*)(\))?''', line) + m = recache(r'''\bfunction *([^\s(]+) *\(([^)]*)(\))?''').search(line) if not m: # Look for form: foo = function(bar, baz) - m = re.search(r'''(\S+) *= *function *\(([^)]*)(\))?''', line) + m = recache(r'''(\S+) *= *function *\(([^)]*)(\))?''').search(line) if not m: # Not a function (or not one we could recognize at least) return None, None @@ -163,7 +173,7 @@ def _parse_function(self, line: str) -> ParseFuncResult: if nextline is None: log.error('%s:%s: function definition is truncated', self.ctx.file, n) return None, None - m = re.search(r'''([^)]*)(\))?''', nextline) + m = recache(r'''([^)]*)(\))?''').search(nextline) if m: argstr, terminated = m.groups() arguments.extend([arg.strip() for arg in argstr.replace(' ', '').split(',') if arg.strip()]) @@ -179,10 +189,10 @@ def _parse_field(self, line: str) -> ParseFuncResult: but the second return value is always None. """ # Fields in the form [foo] = bar - m = re.search(r'''\[([^]]+)\] *=''', line) + m = recache(r'''\[([^]]+)\] *=''').search(line) if m: - return re.sub(r'''['"]''', '', m.group(1)), None - m = re.search(r'''\b([\S\.]+) *=''', line) + return recache(r'''['"]''').sub('', m.group(1)), None + m = recache(r'''\b([\S\.]+) *=''').search(line) if m: return m.group(1), None else: @@ -267,6 +277,7 @@ def _add_reference(self, ref: Reference, modref: Optional[Reference]=None) -> No ref.file, ref.line, ref.type, ref.name, conflict.type, conflict.file, conflict.line) else: self.refs[ref.name] = ref + self.refs_by_id[ref.id] = ref def _check_disconnected_reference(self, ref: Union[Reference, None]) -> bool: """ @@ -278,7 +289,7 @@ def _check_disconnected_reference(self, ref: Union[Reference, None]) -> bool: return True # Potentially disconnected comment stanza here, but let's first check to see if there's # any text in the comments, otherwise a blank --- would warn somewhat pointlessly. - content = ''.join(line.lstrip('-').strip() for (_, line) in ref.content) + content = ''.join(line.lstrip('-').strip() for (_, line) in ref.raw_content) if content: log.warning('%s:%s: comment block is not connected with any section, ignoring', ref.file, ref.line) return False @@ -331,8 +342,8 @@ def parse_source(self, f: IO[str]) -> List[str]: # Reference to the current collection, defaulting to implicit module ref collection = modref self.ctx.update(file=path) - re_start_comment_block = re.compile(r'^(---[^-]|---+$)') - re_require = re.compile(r'''\brequire\b *\(?['"]([^'"]+)['"]''') + re_start_comment_block = recache(r'^(---[^-]|---+$)') + re_require = recache(r'''\brequire\b *\(?['"]([^'"]+)['"]''') while True: n, line = self._next_line(strip=False) if n is None or line is None: @@ -396,7 +407,7 @@ def parse_source(self, f: IO[str]) -> List[str]: self.refs, file=path, line=n, scopes=scopes[:], symbol=args[0], collection=collection ) - field.content.append((n, ' '.join(args[1:]))) + field.raw_content.append((n, ' '.join(args[1:]))) self._add_reference(field, modref) elif tag == 'alias': if not args: @@ -430,7 +441,7 @@ def parse_source(self, f: IO[str]) -> List[str]: raise ParseError(f'@{tag} is missing argment') # Nothing special is otherwise needed here. else: - ref.content.append((n, line)) + ref.raw_content.append((n, line)) else: # This line doesn't start with a comment, but may have one at the end # which we remove here. @@ -555,23 +566,19 @@ def parse_manual(self, name: str, f: IO[str]) -> None: # Only h1, h2, and h3 create section references. if level <= 3: if ref == topref: - # TODO: use field - ref.flags['display'] = heading - ref.clear_cache() + ref.heading = heading # Symbol is used for URL fragment - symbol = re.sub(r'[^a-zA-Z0-9- ]', '', heading.lower()) - symbol = re.sub(r' +', '_', symbol).replace('_-_', '-') + symbol = recache(r'[^a-zA-Z0-9- ]').sub('', heading.lower()) + symbol = recache(r' +').sub('_', symbol).replace('_-_', '-') # Headings don't need to be unique, so check for duplicate symbol if symbol in symbols: symbol = symbol + str(symbols[symbol] + 1) symbols[symbol] = symbols.get(symbol, 0) + 1 ref = SectionRef(self.refs, file=path, line=n, scopes=[topref], symbol=symbol) - # TODO: use field - ref.flags['display'] = heading + ref.heading = heading ref.flags['level'] = level - ref.clear_cache() if ref != topref: self._add_reference(ref) @@ -580,14 +587,19 @@ def parse_manual(self, name: str, f: IO[str]) -> None: # ref's content just below. continue - ref.content.append((n, line)) + ref.raw_content.append((n, line)) + def get_reference(self, typ: Type[Reference], name: str) -> Union[Reference, None]: + """ + Returns the Reference object for the given type and name. + """ for ref in self.parsed[typ]: if ref.name == name: return ref - def _resolve_ref(self, name: str) -> Union[Reference, None]: + + def resolve_ref(self, name: str) -> Union[Reference, None]: """ Finds the Reference object for the given reference name. @@ -620,10 +632,8 @@ def _resolve_ref(self, name: str) -> Union[Reference, None]: ref = self.refs.get(clsref.name + '.' + name) if ref: break - if not ref: - return - if ref.within and not ref.userdata.get('within_topsym'): + if ref and ref.within and 'within_topsym' not in ref.userdata: # Check to see if the @within section is in the same topsym. collections = self.collections[ref.topsym] if ref.within not in collections: @@ -644,6 +654,7 @@ def _resolve_ref(self, name: str) -> Union[Reference, None]: return ref + def _reorder_refs(self, refs: List[RefT], topref: Optional[Reference]=None) -> List[RefT]: """ Reorders the given list of Reference objects according to any @order tags. @@ -729,7 +740,6 @@ def get_elements_in_collection(self, typ: Type[RefT], colref: CollectionRef) -> elems: list[RefT] = [] for ref in self.parsed[typ]: - assert(isinstance(ref, typ)) if topsym and topsym != ref.topsym: # We're constraining the refs search to the given topref but this ref # doesn't belong to that topref. @@ -745,11 +755,63 @@ def get_elements_in_collection(self, typ: Type[RefT], colref: CollectionRef) -> return self._reorder_refs(elems) + def render_ref_markdown(self, ref: Reference, text: Optional[str]=None, code=False) -> str: + """ + Returns the Reference as a markdown link, using luadox:<refid> as the link target, + which can be further resolved by the downstream renderer. + + If code is True, then the given text is wrapped in backticks. + """ + tick = '`' if code else '' + parens = '()' if isinstance(ref, FunctionRef) and not text else '' + return f'[{tick}{text or ref.name}{parens}{tick}](luadox:{ref.id})' + + + def _render_ref_markdown_re(self, m: Match[str]) -> str: + """ + Regexp callback to handle the @{refname} case. + """ + code: bool = (m.group(1) == '`') + ref = self.resolve_ref(m.group(2)) + if ref: + return self.render_ref_markdown(ref, m.group(3), code=code) + else: + log.warning('%s:~%s: reference "%s" could not be resolved', self.ctx.file, self.ctx.line, m.group(2)) + return m.group(3) or m.group(2) + + + def _render_backtick_ref_markdown_re(self, m: Match[str]) -> str: + """ + Regexp callback to handle the `refname` case. + """ + ref = self.resolve_ref(m.group(1)) + if ref: + return self.render_ref_markdown(ref, text=m.group(1), code=True) + else: + # Couldn't resolve the ref, just return back the original text. + return '`{}`'.format(m.group(1)) - def content_to_markdown(self, content: List[Tuple[int, str]], strip_comments=True) -> Tuple[ - Dict[str, Tuple[List[str], str]], - List[Tuple[List[str], str]], - str + + def refs_to_markdown(self, block: str) -> str: + """ + Replaces `refname` and @{refname} in the given block of text with + markdown links. + """ + # return block + # self._xxx = getattr(self, '_xxx', 0) + len(block) + # log.info('process 2: %s', self._xxx) + # Resolve `ref` + block = recache(r'(?<!`)`([^` ]+)`', re.S).sub(self._render_backtick_ref_markdown_re, block) + # Resolve @{ref} and @{ref|text}. Do this *after* `ref` in case the ref is in the + # form `@{stuff}`. + block = recache(r'(`)?@{([^}|]+)(?:\|([^}]*))?}(`)?', re.S).sub(self._render_ref_markdown_re, block) + return block + + + def parse_raw_content(self, lines: List[Tuple[int, str]], strip_comments=True) -> Tuple[ + Dict[str, Tuple[List[str], Content]], + List[Tuple[List[str], Content]], + Content ]: """ Parses a docstring block into markdown. @@ -761,97 +823,83 @@ def content_to_markdown(self, content: List[Tuple[int, str]], strip_comments=Tru tags, a list of (types, docstrings) for @treturn tags, and a string holding the converted content to markdown. """ - output: list[str] = [] - params: dict[str, tuple[list[str], str]] = {} - returns: list[tuple[list[str], str]] = [] - if not content: - return params, returns, '' - - # List of (tag, args, indent, lines) - tagstack: list[tuple[str, list[str], int, list[str]]] = [] - supported_tags = 'tparam', 'treturn', 'usage', 'example', 'code', 'see', 'warning', 'note' - - def end_tag(): - tag, args, indent, lines = tagstack.pop() - target = tagstack[-1][3] if tagstack else output - if tag in ('usage', 'example'): - target.append('##### ' + tag.title()) - if tag in ('usage', 'example', 'code'): - lang = 'lua' if not args else args[0] - target.append('```' + lang) - # Remove trailing newlines. - while lines and not lines[-1].strip(): - lines.pop() - # Dedent all lines according to the indentation of the - # first line. - indent = get_indent_level(lines[0]) - target.extend([l[indent:] for l in lines]) - target.append('```') - elif tag == 'tparam' and len(args) >= 2: - types = args[0].split('|') - name = args[1] - params[name] = types, ' '.join(args[2:] + lines) - elif tag == 'treturn': - types = args[0].split('|') - doc = ' '.join(args[1:] + lines) - returns.append((types, doc)) - elif tag == 'see': - refs = ['@{{{}}}'.format(see) for see in args] - target.append('\n\x01see{}\x03\n'.format(', '.join(refs))) - elif tag == 'warning' or tag == 'note': - # Admonition - heading = ' '.join(args) if args else tag.title() - code = '\n\x01adm{}\x02{}\x02{}\n\x03\n' - target.append(code.format(tag, heading, '\n'.join(lines))) - - # FIXME: this (frustratingly uncommented) function doesn't pass the smell test. - def end_tags(all, line: Optional[str]=None, indent: Optional[int]=None): - if not all: - end_tag() - else: - while tagstack: - end_tag() - if line and tagstack: - last_tag_indent = tagstack[-1][2] - tagstack[-1][3].append(line) - line = None - return line - - last_line = content[-1][0] - for n, line in content: + params: dict[str, tuple[list[str], Content]] = {} + returns: list[tuple[list[str], Content]] = [] + # These tags take nested content + content_tags = {'warning', 'note', 'tparam', 'treturn'} + + # We pass _refs_to_markdown() as a postprocessor for the Content (here as well as + # below) which will resolve all references when the renderer finally fetches the + # markdown content via the Markdown.get() method. + # + # List of (indent, tag, content) + stack: list[tuple[int, str, Content]] = [(0, '', Content(postprocess=self.refs_to_markdown))] + # The number of columns to dedent raw lines before adding to the parsed content. + # If None, we set this to the current line's indent level and use that as dedent + # until reset back to None. + dedent = None + # We tack on a sentinel value at the end of the raw lines which forces closure of + # all pending tags on the stack. + for n, line in lines + [(-1, '')]: self.ctx.update(line=n) tag, args = self._parse_tag(line, require_comment=strip_comments) if strip_comments: line = line.lstrip('-').rstrip() indent = get_indent_level(line) - if tagstack: - last_tag_indent = tagstack[-1][2] - # Determine threshold at which we will consider the last tag to have - # terminated. - if tag: - # Any tag at the same level as the last tag (or below) will close - threshold = last_tag_indent + while len(stack) > 1 and (line or n == -1): + if stack[-1][0] < indent: + break + _, done_tag, content = stack.pop() + if done_tag in {'usage', 'example', 'code'}: + # Remove trailing newlines from the snippet before terminating the + # markdown code block. + content.md().rstrip().append('```') + # Redetect dedent level based on next line. + dedent = None + + # New content fragments are appended to the content object from the top of the + # stack. + content = stack[-1][2] + if tag: + # The Content object this tag's content will be pushed to. For tags that + # take content we initialize a new Content object, otherwise we just reuse + # the last one on the stack and append to it. + tagcontent = Content(postprocess=self.refs_to_markdown) if tag in content_tags else stack[-1][2] + stack.append((indent, tag, tagcontent)) + + if tag in {'usage', 'example', 'code'}: + if tag in {'usage', 'example'}: + # @usage and @example add a header. + content.md().append(f'##### {tag.title()}\n') + lang = 'lua' if not args else args[0] + content.md().append(f'```{lang}') + # Ensure subsequent dedent is based on the first line of the code + # block + dedent = None + elif tag in {'warning', 'note'}: + heading = self.refs_to_markdown(' '.join(args) if args else tag.title()) + content.append(Admonition(tag, heading, tagcontent)) + elif tag == 'tparam' and args and len(args) >= 2: + types = args[0].split('|') + name = args[1] + tagcontent.md().append(' '.join(args[2:])) + params[name] = types, tagcontent + elif tag == 'treturn' and args: + types = args[0].split('|') + tagcontent.md().append(' '.join(args[1:])) + returns.append((types, tagcontent)) + elif tag == 'see' and args: + refs = [self.resolve_ref(see) for see in args] + content.append(SeeAlso([ref.id for ref in refs if ref])) else: - threshold = last_tag_indent - if not tag and indent > threshold and line: - tagstack[-1][3].append(line) - line = None - if n == last_line or (line and indent <= threshold): - line = end_tags(n == last_line, line if not tag else None, indent) - - if tag and args is not None: - tagstack.append((tag, args, indent, [])) - if tag not in supported_tags: - log.error('%s:%s: unknown tag @%s', self.ctx.file, n, tag) - elif n == last_line: - end_tags(n == last_line) + log.error('%s:%s: unknown tag @%s or missing arguments', self.ctx.file, n, tag) + elif line is not None: - if tagstack: - last = tagstack[-1] - last[3].append(line) - else: - output.append(line) - return params, returns, '\n'.join(output) + dedent = indent if dedent is None else dedent + content.md().append(line[dedent:]) + if len(stack) != 1: + log.error('%s:~%s: LuaDox bug: @%s is dangling', self.ctx.file, lines[-1][0], stack[-1][1]) + return params, returns, stack[0][2] diff --git a/src/prerender.py b/src/prerender.py index e6df2a4..411e1fa 100644 --- a/src/prerender.py +++ b/src/prerender.py @@ -23,9 +23,11 @@ class Prerenderer: """ - The prerender stage populates the specific typed Reference fields needed for rendering. - generates intermediate data structures used by renderers. All - references are resolved, and tags (such as @param) are parsed and validated. + The prerender stage populates the specific typed Reference fields needed for + rendering. generates intermediate data structures used by renderers. + + All references are resolved to markdown links (whose target is in the form + luadox:<refid>), and tags (such as @tparam) are parsed and validated. """ def __init__(self, parser: Parser): self.parser = parser @@ -34,7 +36,9 @@ def __init__(self, parser: Parser): def process(self) -> List[TopRef]: """ - Preprocesses all Reference objects created by the parser by handling all remaining tags within content docstrings, normalizing content to markdown, and returns a sorted list of toprefs for rendering. + Preprocesses all Reference objects created by the parser by handling all remaining + tags within content docstrings, normalizing content to markdown, and returns a + sorted list of toprefs for rendering. """ toprefs: list[TopRef] = [] for ref in self.parser.topsyms.values(): @@ -46,80 +50,83 @@ def process(self) -> List[TopRef]: toprefs.sort(key=lambda ref: (ref.type, ref.symbol)) return toprefs + def _do_classmod(self, topref: Union[ClassRef, ModuleRef]): has_content = False for colref in self.parser.get_collections(topref): self.ctx.update(ref=colref) # Parse out section heading and body. - _, _, md = self.parser.content_to_markdown(colref.content) + _, _, content = self.parser.parse_raw_content(colref.raw_content) if isinstance(colref, (ClassRef, ModuleRef)): heading = colref.symbol - body = md else: - heading, body = get_first_sentence(md) + heading = content.get_first_sentence(pop=True) # Fall back to section name if there is no content for the heading. heading = heading.strip() or colref.name colref.heading = heading - colref.body = body + colref.content = content topref.collections.append(colref) functions = list(self.parser.get_elements_in_collection(FunctionRef, colref)) fields = list(self.parser.get_elements_in_collection(FieldRef, colref)) - has_content = has_content or colref.body or functions or fields + has_content = has_content or colref.content or functions or fields colref.compact = colref.flags.get('compact', []) fullnames: bool = colref.flags.get('fullnames', False) for ref in fields: self.ctx.update(ref=ref) - _, _, md = self.parser.content_to_markdown(ref.content) + _, _, content = self.parser.parse_raw_content(ref.raw_content) ref.title = ref.flags.get('display') or (ref.name if fullnames else ref.symbol) ref.types = ref.flags.get('type', []) ref.meta = ref.flags.get('meta') - ref.md = md + ref.content = content colref.fields.append(ref) for ref in functions: self.ctx.update(ref=ref) - paramsdict, returns, md = self.parser.content_to_markdown(ref.content) + paramsdict, returns, content = self.parser.parse_raw_content(ref.raw_content) # args is as defined in the function definition in source, while params is # based on tags. Log a warning for any undocumented argument as long as # there is at least one documented parameter. - params: List[Tuple[str, List[str], str]] = [] + params: List[Tuple[str, List[str], Content]] = [] # ref.extra contains the list of parameter names as parsed from the # source. Construct the params list based on for param in ref.extra: try: params.append((param, *paramsdict[param])) except KeyError: - params.append((param, [], '')) + params.append((param, [], Content())) if paramsdict: log.warning('%s:%s: %s() missing @tparam for "%s" parameter', ref.file, ref.line, ref.name, param) ref.title = ref.display ref.params = params ref.returns = returns - ref.meta = ref.flags.get('meta') - ref.md = md + ref.meta = self.parser.refs_to_markdown(ref.flags['meta']) if 'meta' in ref.flags else '' + ref.content = content colref.functions.append(ref) topref.userdata['empty'] = not has_content + def _do_manual(self, topref: ManualRef): - if topref.content: + if topref.raw_content: + self.ctx.update(ref=topref) # Include any preamble before the first heading. - _, _, md = self.parser.content_to_markdown(topref.content, strip_comments=False) - topref.md = md + _, _, content = self.parser.parse_raw_content(topref.raw_content, strip_comments=False) + topref.content = content + topref.heading = self.parser.refs_to_markdown(topref.heading) for ref in self.parser.get_collections(topref): # Manuals only have SectionRefs assert(isinstance(ref, SectionRef)) self.ctx.update(ref=ref) - _, _, md = self.parser.content_to_markdown(ref.content, strip_comments=False) - ref.heading = ref.display - ref.body = md + _, _, content = self.parser.parse_raw_content(ref.raw_content, strip_comments=False) + ref.heading = self.parser.refs_to_markdown(ref.heading) + ref.content = content ref.level = int(ref.flags['level']) topref.collections.append(ref) diff --git a/src/reference.py b/src/reference.py index de0e7cb..bf621a0 100644 --- a/src/reference.py +++ b/src/reference.py @@ -23,6 +23,7 @@ from typing import TypeVar, Optional, Union, List, Tuple, Dict, Any from .log import log +from .utils import Content # Used for generics taking Reference types RefT = TypeVar('RefT', bound='Reference') @@ -97,7 +98,9 @@ class Reference: # A list of lines containing the documented content for this collection. Each element # is a 2-tuple in the form (line number, text) where line number is the specific line # in self.file where the comment appears, and text is in markdown format. - content: List[Tuple[int, str]] = field(default_factory=list) + raw_content: List[Tuple[int, str]] = field(default_factory=list) + # The processed (from raw) content which is set during the prerender stage + content: Content = field(default_factory=Content) # A map of modifiers that apply to this Reference that affect how it is rendered, # mostly from @tags. These are accumulated in the flags dict until all parsing # is done and then the parser process stage will convert these to proper fields in the @@ -171,10 +174,19 @@ def name(self) -> str: assert(self._name) return self._name + @property + def id(self) -> str: + """ + A globally unique identifier of the Reference. + """ + return f'{self.topref.type}#{self.topsym}#{self.name}' + @property def topsym(self) -> str: """ Returns the symbol name of our top-level reference. + + This does *not* honor @within. """ if not self._topsym: self._set_topsym() @@ -187,6 +199,8 @@ def topref(self) -> 'Reference': Returns the Reference object for the top-level reference this ref belongs to. If we're already a top-level Ref (e.g. class or module) then self is returned. + + This does *not* honor @within. """ # If there are no scopes, we *are* the topref return self if not self.scopes else self.parser_refs[self.topsym] @@ -280,8 +294,6 @@ class FieldRef(Reference): # User-defined meta value (parsed from @meta via flags) meta: Optional[str] = None - # Markdown content of the field - md: str = '' # Renderable display name that takes tags such as @fullnames into account title: str = '' # Allowed types for this field, which can be empty if no @type tag @@ -343,9 +355,9 @@ class FunctionRef(FieldRef): type: str = 'function' # List of (name, types, docstring) - params: List[Tuple[str, List[str], str]] = field(default_factory=list) - # List of (types, markdown)j - returns: List[Tuple[List[str], str]] = field(default_factory=list) + params: List[Tuple[str, List[str], Content]] = field(default_factory=list) + # List of (types, docstring)j + returns: List[Tuple[List[str], Content]] = field(default_factory=list) @dataclass @@ -354,7 +366,6 @@ class CollectionRef(Reference): A collection can fields and functions, """ heading: str = '' - body: str = '' # List of 'functions' and/or 'fields' to indicate which should be rendered in compact # form compact: List[str] = field(default_factory=list) @@ -366,8 +377,6 @@ class TopRef(CollectionRef): """ Represents a top-level reference such as class or module. """ - # Preamble before any sections - md: str = '' # Ordered list of collections within this topref, which respects @within and @reorder collections: List[CollectionRef] = field(default_factory=list) diff --git a/src/render.py b/src/render.py index 94eb694..a238a66 100644 --- a/src/render.py +++ b/src/render.py @@ -19,7 +19,7 @@ import re import mimetypes from contextlib import contextmanager -from typing import Union, Match, Tuple, List, Callable, Generator, Type +from typing import Union, Tuple, List, Callable, Generator, Type import commonmark.blocks import commonmark_extensions.tables @@ -30,19 +30,46 @@ from .parse import * from .utils import * +# Files from the assets directory to be copied +ASSETS = [ + 'luadox.css', + 'prism.css', + 'prism.js', + 'js-search.min.js', + 'search.js', + 'img/i-left.svg', + 'img/i-right.svg', + 'img/i-download.svg', + 'img/i-github.svg', + 'img/i-gitlab.svg', + 'img/i-bitbucket.svg', +] + # Effectively disable implicit code blocks commonmark.blocks.CODE_INDENT = 1000 - class CustomRendererWithTables(commonmark_extensions.tables.RendererWithTables): - def make_table_node(self, node): + def __init__(self, renderer: 'Renderer', *args, **kwargs): + self.renderer = renderer + self.parser = renderer.parser + super().__init__(*args, **kwargs) + + def make_table_node(self, _): return '<table class="user">' + def link(self, node, entering): + if node.destination.startswith('luadox:'): + refid = node.destination[7:] + # If this raises KeyError it indicates a bug in the parser code + ref = self.parser.refs_by_id[refid] + node.destination = self.renderer._get_ref_href(ref) + super().link(node, entering) + # https://github.com/GovReady/CommonMark-py-Extensions/issues/3#issuecomment-756499491 # Thanks to hughdavenport class TableWaitingForBug3(commonmark_extensions.tables.Table): @staticmethod - def continue_(parser, container=None): + def continue_(parser, _=None): ln = parser.current_line if not parser.indented and commonmark.blocks.peek(ln, parser.next_nonspace) == "|": parser.advance_next_nonspace() @@ -59,22 +86,6 @@ class Renderer: """ Takes a Parser object and provides an interface to generate rendered HTML. """ - # Common abbreviations with periods that are considered when determining what is the - # first sentence of a markdown block - RE_ABBREV = re.compile(r'(e\.?g\.|i\.?e\.|etc\.|et al\.|vs\.)', flags=re.I|re.S) - # Regexp patterns that progressively narrow down a markdown block to its first - # sentence - RE_FIRST_SENTENCE = ( - # First pass: Move everything after a paragraph break (two newlines) to - # the remaining block - re.compile(r'^(.*\n\s*\n)(.*)$', flags=re.S), - # Second pass: Move (prepend) anything including and below a markdown heading - # to the remaining block. Fixes #6. - re.compile(r'(.*)(?:^|\n)(#.*)', flags=re.S), - # Final pass: take everything up to the first period as the first sentence. - re.compile(r'^(.+?[.?!])(?: |$|\n)(.*)', flags=re.S), - ) - def __init__(self, parser: Parser): self.parser = parser self.config = parser.config @@ -120,6 +131,7 @@ def _get_ref_link_info(self, ref: Reference) -> Tuple[str, str]: topref = self.parser.refs[topsym] except KeyError: raise KeyError('top-level reference "%s" not found (from "%s")' % (topsym, ref.name)) from None + prefix = self._get_root_path() if not isinstance(ref.topref, ManualRef) or ref.topref.name != 'index': prefix += '{}/'.format(topref.type) @@ -128,8 +140,7 @@ def _get_ref_link_info(self, ref: Reference) -> Tuple[str, str]: fragment = '#' + ref.symbol if ref.scopes else '' else: fragment = '#{}'.format(ref.name) if ref.name != ref.topsym else '' - - return prefix + (ref.userdata.get('within_topsym') or ref.topsym) + '.html', fragment + return prefix + topsym + '.html', fragment def _get_ref_href(self, ref: Reference) -> str: """ @@ -139,104 +150,76 @@ def _get_ref_href(self, ref: Reference) -> str: file, fragment = self._get_ref_link_info(ref) return file + fragment - def _render_ref_markdown(self, ref: Reference, text: str, code=False) -> str: - """ - Returns the Reference as a markdown link. - - If code is True, then the given text is wrapped in backticks. - """ - backtick = '`' if code else '' - return '[{tick}{text}{parens}{tick}]({href})'.format( - tick=backtick, - text=text or ref.name, - parens='()' if isinstance(ref, FunctionRef) and not text else '', - href=self._get_ref_href(ref) - ) - - def _render_ref_markdown_re(self, m: Match[str]) -> str: - """ - Regexp callback to handle the @{refname} case. - """ - code: bool = (m.group(1) == '`') - ref = self.parser._resolve_ref(m.group(2)) - if ref: - return self._render_ref_markdown(ref, m.group(3), code=code) - else: - log.warning('%s:~%s: reference "%s" could not be resolved', self.ctx.file, self.ctx.line, m.group(2)) - return m.group(3) or m.group(2) - - def _render_backtick_ref_markdown_re(self, m: Match[str]) -> str: - """ - Regexp callback to handle the `refname` case. - """ - ref = self.parser._resolve_ref(m.group(1)) - if ref: - return self._render_ref_markdown(ref, text=m.group(1), code=True) - else: - # Couldn't resolve the ref, just return back the original text. - return '`{}`'.format(m.group(1)) - - def _refs_to_markdown(self, block: str) -> str: + def _permalink(self, id: str) -> str: """ - Replaces `refname` and @{refname} in the given block of text with - markdown links. + Returns the HTML for a permalink used for directly linkable references such + as section headings, functions, fields, etc. """ - # Resolve `ref` - block = re.sub(r'(?<!`)`([^` ]+)`', self._render_backtick_ref_markdown_re, block, 0, re.S) - # Resolve @{ref} and @{ref|text}. Do this *after* `ref` in case the ref is in the - # form `@{stuff}`. - block = re.sub(r'(`)?@{([^}|]+)(?:\|([^}]*))?}(`)?', self._render_ref_markdown_re, block, 0, re.S) - return block + return '<a class="permalink" href="#{}" title="Permalink to this definition">¶</a>'.format(id) def _markdown_to_html(self, md: str) -> str: """ Renders the given markdown as HTML and returns the result. """ - md = self._refs_to_markdown(md) parser = commonmark_extensions.tables.ParserWithTables() ast = parser.parse(md) - html = CustomRendererWithTables().render(ast) - - def replace_admonition(m: Match[str]): - type, title, content = m.group(1).split('\x02') - # content = content.replace('<p></p>', '') - return '<div class="admonition {}"><div class="title">{}</div><div class="body"><p>{}</p>\n</div></div>'.format(type, title, content.rstrip()) - - html = re.sub(r'<p>\x01adm([^\x03]+)\x03\s*</p>', replace_admonition, html, flags=re.S) - html = re.sub(r'<p>\x01see([^\x03]+)\x03\s*</p>', '<div class="see">See also \\1</div>', html, flags=re.S) - return html + return CustomRendererWithTables(self).render(ast) + + def _content_to_html(self, content: Content) -> str: + output = [] + for elem in content: + if isinstance(elem, Markdown): + output.append(self._markdown_to_html(elem.get())) + elif isinstance(elem, Admonition): + inner = self._content_to_html(elem.content) + output.append(f'<div class="admonition {elem.type}"><div class="title">{elem.title}</div><div class="body">{inner.strip()}\n</div></div>') + elif isinstance(elem, SeeAlso): + refs = [self.parser.refs_by_id[id] for id in elem.refs] + md = ', '.join(self.parser.render_ref_markdown(ref) for ref in refs) + # HTML will have <p></p> tags so strip them out first. + html = self._markdown_to_html(md).strip()[3:-4] + output.append(f'<div class="see">See also {html}</div>') + else: + raise ValueError(f'unsupported content fragment type {type(elem)}') + return '\n'.join(output) def _markdown_to_text(self, md: str) -> str: """ - Strips markdown codes from the given markdown and returns the result. + Strips markdown codes from the given Markdown and returns the result. """ # Code blocks - text = re.sub(r'```.*?```', '', md, flags=re.S) + text = recache(r'```.*?```', re.S).sub('', md) # Inline preformatted code - text = re.sub(r'`([^`]+)`', '\\1', text) + text = recache(r'`([^`]+)`').sub('\\1', text) # Headings - text = re.sub(r'#+', '', text) + text = recache(r'#+').sub('', text) # Bold - text = re.sub(r'\*([^*]+)\*', '\\1', text) + text = recache(r'\*([^*]+)\*').sub('\\1', text) # Link or inline image - text = re.sub(r'!?\[([^]]*)\]\([^)]+\)', '\\1', text) + text = recache(r'!?\[([^]]*)\]\([^)]+\)').sub('\\1', text) # Clean up non-markdown things. # Reference with custom display - text = re.sub(r'@{[^|]+\|([^}]+)\}', '\\1', text) + text = recache(r'@{[^|]+\|([^}]+)\}').sub('\\1', text) # Just a reference - text = re.sub(r'@{([^}]+)\}', '\\1', text) - # Replace admonissions with text elements - text = re.sub( - r'\x01adm[^\x02]+\x02([^\x03]+)\x03', - lambda m: m.group(1).replace('\x02', ' '), - text, - flags=re.S) - # Remove other special encoded content - text = re.sub(r'\x01([^\x03]*)\x03', '', text) + text = recache(r'@{([^}]+)\}').sub('\\1', text) # Consolidate multiple whitespaces - text = re.sub(r'\s+', ' ', text) - return text.strip() + text = recache(r'\s+').sub(' ', text) + return text + + def _content_to_text(self, content: Content) -> str: + """ + Strips markdown codes from the given Content and returns the result. + """ + output = [] + for elem in content: + if isinstance(elem, Admonition): + output.append(self._markdown_to_text(elem.title)) + output.append(self._content_to_text(elem.content)) + elif isinstance(elem, Markdown): + output.append(self._markdown_to_text(elem.get())) + return '\n'.join(output).strip() + def _types_to_html(self, types: List[str]) -> str: """ @@ -245,7 +228,7 @@ def _types_to_html(self, types: List[str]) -> str: """ resolved: list[str] = [] for tp in types: - ref = self.parser._resolve_ref(tp) + ref = self.parser.resolve_ref(tp) if ref: href = self._get_ref_href(ref) tp = '<a href="{}">{}</a>'.format(href, tp) @@ -303,9 +286,11 @@ def _render_html(self, topref: TopRef, lines: List[str]) -> Generator[ out = lines.append root = self._get_root_path() head: list[str] = [] + css = self.config.get('project', 'css', fallback=None) if css: head.append('<link href="{}{}?{}" rel="stylesheet" />'.format(root, css, self._assets_version)) + favicon = self.config.get('project', 'favicon', fallback=None) if favicon: mimetype, _ = mimetypes.guess_type(favicon) @@ -313,6 +298,7 @@ def _render_html(self, topref: TopRef, lines: List[str]) -> Generator[ # Favicon is always copied to doc root, so take only the filename _, favicon = os.path.split(favicon) head.append('<link rel="shortcut icon" {} href="{}{}?{}"/>'.format(mimetype, root, favicon, self._assets_version)) + out(self._templates['head'].format( version=self._assets_version, title=html_title, @@ -323,7 +309,7 @@ def _render_html(self, topref: TopRef, lines: List[str]) -> Generator[ # as Search page) fall back to 'other' topref.type or 'other', # Second segment is the stripped form of the ref name. - re.sub(r'\W+', '', topref.name).lower() + recache(r'\W+').sub('', topref.name).lower() ) )) @@ -404,7 +390,7 @@ def _render_html(self, topref: TopRef, lines: List[str]) -> Generator[ # it in the list of manual pages. continue cls = ' class="selected"' if ref.name == topref.name else '' - out('<li{}><a href="{}">{}</a></li>'.format(cls, self._get_ref_href(ref), ref.display)) + out('<li{}><a href="{}">{}</a></li>'.format(cls, self._get_ref_href(ref), ref.heading)) out('</ul>') out('</div>') @@ -440,10 +426,9 @@ def _render_html(self, topref: TopRef, lines: List[str]) -> Generator[ out('</div>') out(self._templates['foot'].format(root=root, version=self._assets_version)) - def render(self, topref: TopRef) -> str: + def _render_topref(self, topref: TopRef) -> str: """ - Renders a prerendered Page to HTML, returning a string containing the rendered - HTML. + Renders a topref to HTML, returning a string containing the rendered HTML. """ lines = [] with self._render_html(topref, lines) as out: @@ -453,29 +438,22 @@ def render(self, topref: TopRef) -> str: self._render_manual(topref, out) return '\n'.join(lines) - def _permalink(self, id: str) -> str: - """ - Returns the HTML for a permalink used for directly linkable references such - as section headings, functions, fields, etc. - """ - return '<a class="permalink" href="#{}" title="Permalink to this definition">¶</a>'.format(id) - def _render_manual(self, topref: ManualRef, out: Callable[[str], None]) -> None: """ Renders the given manual top-level Reference as HTML, calling the given out() function for each line of HTML. """ out('<div class="manual">') - if topref.md: + if topref.content: # Preamble - out(self._markdown_to_html(topref.md)) + out(self._content_to_html(topref.content)) for secref in topref.collections: # Manual pages only contain SectionRefs assert(isinstance(secref, SectionRef)) out('<h{} id="{}">{}'.format(secref.level, secref.symbol, secref.heading)) out(self._permalink(secref.symbol)) out('</h{}>'.format(secref.level)) - out(self._markdown_to_html(secref.body)) + out(self._content_to_html(secref.content)) out('</div>') def _render_classmod(self, topref: Union[ClassRef, ModuleRef], out: Callable[[str], None]) -> None: @@ -522,8 +500,8 @@ def _render_classmod(self, topref: Union[ClassRef, ModuleRef], out: Callable[[st out('</ul>') out('</div>') - if colref.body: - out(self._markdown_to_html(colref.body)) + if colref.content: + out(self._content_to_html(colref.content)) fields_title = 'Fields' fields_meta_columns = 0 @@ -582,9 +560,12 @@ def _render_classmod(self, topref: Union[ClassRef, ModuleRef], out: Callable[[st out('<td class="meta"></td>') nmeta -= 1 - md = get_first_sentence(ref.md)[0] if not fields_compact else ref.md - if md: - out('<td class="doc">{}</td>'.format(self._markdown_to_html(md))) + if not fields_compact: + html = self._markdown_to_html(ref.content.get_first_sentence()) + else: + html = self._content_to_html(ref.content) + if html: + out('<td class="doc">{}</td>'.format(html)) out('</tr>') out('</table>') @@ -611,8 +592,11 @@ def _render_classmod(self, topref: Union[ClassRef, ModuleRef], out: Callable[[st out('<td class="meta"></td>') meta -= 1 - md = get_first_sentence(ref.md)[0] if not functions_compact else ref.md - out('<td class="doc">{}</td>'.format(self._markdown_to_html(md))) + if not functions_compact: + html = self._markdown_to_html(ref.content.get_first_sentence()) + else: + html = self._content_to_html(ref.content) + out('<td class="doc">{}</td>'.format(html)) out('</tr>') out('</table>') out('</div>') @@ -635,7 +619,7 @@ def _render_classmod(self, topref: Union[ClassRef, ModuleRef], out: Callable[[st out(self._permalink(ref.name)) out('</dt>') out('<dd>') - out(self._markdown_to_html(ref.md)) + out(self._content_to_html(ref.content)) out('</dd>') out('</dl>') @@ -655,7 +639,7 @@ def _render_classmod(self, topref: Union[ClassRef, ModuleRef], out: Callable[[st out(self._permalink(ref.name)) out('</dt>') out('<dd>') - out(self._markdown_to_html(ref.md)) + out(self._content_to_html(ref.content)) # Only show the praameters table if there's at least one documented parameter. if any(types or doc for _, types, doc in ref.params): out('<div class="heading">Parameters</div>') @@ -664,7 +648,7 @@ def _render_classmod(self, topref: Union[ClassRef, ModuleRef], out: Callable[[st out('<tr>') out('<td class="name"><var>{}</var></td>'.format(param)) out('<td class="types">({})</td>'.format(self._types_to_html(types))) - out('<td class="doc">{}</td>'.format(self._markdown_to_html(doc))) + out('<td class="doc">{}</td>'.format(self._content_to_html(doc))) out('</tr>') out('</table>') if ref.returns: @@ -675,7 +659,7 @@ def _render_classmod(self, topref: Union[ClassRef, ModuleRef], out: Callable[[st if len(ref.returns) > 1: out('<td class="name">{}.</td>'.format(n)) out('<td class="types">({})</td>'.format(self._types_to_html(types))) - out('<td class="doc">{}</td>'.format(self._markdown_to_html(doc))) + out('<td class="doc">{}</td>'.format(self._content_to_html(doc))) out('</tr>') out('</table>') out('</dd>') @@ -691,10 +675,9 @@ def render_search_index(self) -> str: self.ctx.update(ref=topref) lines = [] out = lines.append - def add(ref: Reference, typ: Type[Reference]): + def add(ref: RefT, typ: Type[RefT]): href = self._get_ref_href(ref) - _, _, md = self.parser.content_to_markdown(ref.content) - text = self._markdown_to_text(md) + text = self._content_to_text(ref.content) title = ref.display if typ == SectionRef and not isinstance(ref.topref, ManualRef): # Non-manual sections typically use the first sentence as the section @@ -740,3 +723,48 @@ def render_landing_page(self) -> str: with self._render_html(topref, lines): pass return '\n'.join(lines) + + + def render(self, toprefs: List[TopRef], outdir: str) -> None: + """ + Renders all toprefs to the given output directory. + + It's the caller's obligation to have passed these toprefs through the prerenderer. + """ + for ref in toprefs: + if ref.userdata.get('empty') and ref.implicit: + # Reference has no content and it was also implicitly generated, so we don't render it. + log.info('not rendering empty %s %s', ref.type, ref.name) + continue + if isinstance(ref, ManualRef) and ref.name == 'index': + typedir = outdir + else: + typedir = os.path.join(outdir, ref.type) + os.makedirs(typedir, exist_ok=True) + outfile = os.path.join(typedir, ref.name + '.html') + log.info('rendering %s %s -> %s', ref.type, ref.name, outfile) + html = self._render_topref(ref) + with open(outfile, 'w', encoding='utf8') as f: + f.write(html) + + js = self.render_search_index() + with open(os.path.join(outdir, 'index.js'), 'w', encoding='utf8') as f: + f.write(js) + + html = self.render_search_page() + with open(os.path.join(outdir, 'search.html'), 'w', encoding='utf8') as f: + f.write(html) + + if not self.parser.get_reference(ManualRef, 'index'): + # The user hasn't specified an index manual page, so we generate a blank + # landing page that at least presents the sidebar with available links. + html = self.render_landing_page() + with open(os.path.join(outdir, 'index.html'), 'w', encoding='utf8') as f: + f.write(html) + + for name in ASSETS: + outfile = os.path.join(outdir, name) + if os.path.dirname(name): + os.makedirs(os.path.dirname(outfile), exist_ok=True) + with open(outfile, 'wb') as f: + f.write(assets.get(name)) diff --git a/src/utils.py b/src/utils.py index 9297366..55c1d43 100644 --- a/src/utils.py +++ b/src/utils.py @@ -13,30 +13,144 @@ # limitations under the License. __all__ = [ - 'Sentinel', 'get_first_sentence', 'get_indent_level', 'strip_trailing_comment' + 'Sentinel', 'Content', 'ContentFragment', 'Markdown', 'Admonition', 'SeeAlso', + 'recache', 'get_first_sentence', 'get_indent_level', 'strip_trailing_comment', ] import enum import re -from typing import Tuple +import string +from dataclasses import dataclass +from functools import lru_cache +from typing import Tuple, List, Callable, Optional, Pattern # Common abbreviations with periods that are considered when determining what is the -# first sentence of a markdown block -RE_ABBREV = re.compile(r'(e\.?g\.|i\.?e\.|etc\.|et al\.|vs\.)', flags=re.I|re.S) -# Regexp patterns that progressively narrow down a markdown block to its first -# sentence -RE_FIRST_SENTENCE = ( - # First pass: Move everything after a paragraph break (two newlines) to - # the remaining block - re.compile(r'^(.*\n\s*\n)(.*)$', flags=re.S), - # Second pass: Move (prepend) anything including and below a markdown heading - # to the remaining block. Fixes #6. - re.compile(r'(.*)(?:^|\n)(#.*)', flags=re.S), - # Final pass: take everything up to the first period as the first sentence. - re.compile(r'^(.+?[.?!])(?: |$|\n)(.*)', flags=re.S), -) -RE_INDENT = re.compile(r'^( *)') -RE_COMMENT = re.compile(r'--.*') +# first sentence of a markdown block. +ABBREV = { + 'e': ('e.g.', 'eg.', 'etc.', 'et al.'), + 'i': ('i.e.', 'ie.'), + 'v': ('vs.',), +} +# Used for detecting word boundaries. Anything *not* in this set can be considered as a +# word boundary. +WORD_CHARS = set(string.ascii_lowercase) + +# Callback type used by content objects for postprocessing finalized content. Used for +# converting refs to markdown links. +PostProcessFunc = Optional[Callable[[str], str]] + +class ContentFragment: + """ + Base class for elements of a Content list. + """ + pass + + +class Markdown(ContentFragment): + """ + Represents a markdown string. + """ + def __init__(self, value: Optional[str] = None, postprocess: Optional[PostProcessFunc]=None): + # Lines accumulated via append() + self._lines = [value] if value is not None else [] + self._postprocess = postprocess + # Cached postprocessed value + # append() is called between get() calls (this case is rare or nonexistent) + self._value: str|None = None + + def append(self, s: str) -> 'Markdown': + """ + Appends a line to the markdown string. Cannot be called after get(). + """ + assert(self._value is None) + self._lines.append(s) + return self + + def rstrip(self) -> 'Markdown': + """ + Removes trailing whitespace from the current set of lines added by append(). + """ + self._lines = ['\n'.join(self._lines).rstrip()] + return self + + def get(self) -> str: + """ + Returns the final markdown string, postprocessed if a postprocessor was passed during initialization. + + append() cannot be called after this point. + """ + if self._value is None: + md = '\n'.join(self._lines) + if self._postprocess: + md = self._postprocess(md) + self._value = md + del self._lines[:] + return self._value + + +@dataclass +class Admonition(ContentFragment): + """ + A @note or @warning admonition tag. + """ + type: str + title: str + content: 'Content' + + +@dataclass +class SeeAlso(ContentFragment): + """ + A @see tag. + """ + # List of ref ids. + refs: List[str] + + +class Content(List[ContentFragment]): + """ + Parsed and prerendered content. The prerender stage resolves all references to + 'luadox:' markdown links. + + Content is captured as a list of content fragments -- the most common of which is + Markdown -- where fragments are different types of objects that the renderer needs to + decide how to translate. + """ + def __init__(self, *args, postprocess: PostProcessFunc = None): + super().__init__(*args) + self._md_postprocess = postprocess + self._first = None + + def get_first_sentence(self, pop=False) -> str: + """ + Returns the first sentence from the content. If pop is True then the content + is updated in-place to remove the sentence that was returned. + """ + if len(self) == 0: + return '' + e = self[0] + if not isinstance(e, Markdown): + return '' + first, remaining = get_first_sentence(e.get()) + if pop: + if remaining: + self[0] = Markdown(remaining) + else: + self.pop(0) + return first + + def md(self, postprocess: PostProcessFunc = None) -> Markdown: + """ + Convenience method that returns the last fragment in the content list if it's a + Markdown, or creates and appends a new one if the last element isn't Markdown. + """ + if len(self) > 0 and isinstance(self[-1], Markdown): + md = self[-1] + assert(isinstance(md, Markdown)) + else: + md = Markdown(postprocess=postprocess or self._md_postprocess) + self.append(md) + return md class Sentinel(enum.Enum): @@ -46,36 +160,62 @@ class Sentinel(enum.Enum): UNDEF = object() -def get_first_sentence(md: str) -> Tuple[str, str]: +@lru_cache(maxsize=None) +def recache(pattern: str, flags: int = 0) -> Pattern[str]: + """ + Returns a compiled regexp pattern, caching the result for subsequent invocations. + """ + return re.compile(pattern, flags) + + +def get_first_sentence(s: str) -> Tuple[str, str]: """ - Returns a 2-tuple of the first sentence from the given markdown, and - all remaining. + Returns a 2-tuple of the first sentence from the given markdown, and all remaining. """ - # This is rather cheeky, but just handles these common abbreviations so they don't - # interpreted as end-of-sentence. - escape = lambda m: m.group(1).replace('.', '\x00') - unescape = lambda s: s.replace('\x00', '.') - first = RE_ABBREV.sub(escape, md) - remaining = '' - for pat in RE_FIRST_SENTENCE: - m = pat.search(first) - if m: - first, pre = m.groups() - remaining = pre + remaining - # Remove period but preserve other sentence-ending punctuation from first - # sentence - return unescape(first).strip().rstrip('.'), unescape(remaining).strip() + # This is fairly low level looking code, but it performs reasonably well for what it + # does. + l = s.lower() + end = len(l) - 1 + last = '' + n = 0 + while n <= end: + c = l[n] + if c == '\n' and last == '\n': + # Treat two consecutive newlines as a sentence terminator. + break + elif c == '.': + # Is this period followed by whitespace or EOL? + if n == end or l[n+1] == ' ' or l[n+1] == '\n': + # Found end-of-sentence. + break + elif c in ABBREV and last not in WORD_CHARS: + # This character appears to start a word of an abbreviation we want to handle. + # If the next set of characters matches an abbrevation variation, skip over + # it. + for abbr in ABBREV[c]: + if l[n:n+len(abbr)] == abbr: + # Subtract 1 from the abbrevation length since we're adding 1 below + n += len(abbr) - 1 + break + last = l[n] + n += 1 + else: + # Didn't break out of while loop so we weren't able to find end-of-sentence. + # Consider the entire given string as the first sentence. + return s, '' + + # If we're here, n represents the position of the end of first sentence. + return s[:n], s[n+1:].strip() def get_indent_level(s: str) -> int: """ Returns the number of spaces on left side of the string. """ - m = RE_INDENT.search(s) + m = recache(r'^( *)').search(s) return len(m.group(1)) if m else 0 def strip_trailing_comment(line: str) -> str: - return RE_COMMENT.sub('', line) - + return recache(r'--.*').sub('', line)