diff --git a/sdks/python/apache_beam/yaml/generate_yaml_docs.py b/sdks/python/apache_beam/yaml/generate_yaml_docs.py index 25df4a142c83..6ff4063088ce 100644 --- a/sdks/python/apache_beam/yaml/generate_yaml_docs.py +++ b/sdks/python/apache_beam/yaml/generate_yaml_docs.py @@ -16,7 +16,7 @@ # import argparse -import contextlib +import io import re import yaml @@ -154,7 +154,8 @@ def transform_docs(t, providers): return '\n'.join([ f'## {t}', '', - longest(lambda p: p.description(t), providers), + longest(lambda p: p.description(t), + providers).replace('::\n', '\n\n :::yaml\n'), '', '### Configuration', '', @@ -162,6 +163,8 @@ def transform_docs(t, providers): '', '### Usage', '', + ' :::yaml', + '', indent(longest(lambda p: pretty_example(p, t), providers), 4), ]) @@ -169,6 +172,7 @@ def transform_docs(t, providers): def main(): parser = argparse.ArgumentParser() parser.add_argument('--markdown_file') + parser.add_argument('--html_file') parser.add_argument('--schema_file') parser.add_argument('--include', default='.*') parser.add_argument( @@ -179,52 +183,122 @@ def main(): with subprocess_server.SubprocessServer.cache_subprocesses(): json_config_schemas = [] - with contextlib.ExitStack() as stack: - if options.markdown_file: - markdown_out = stack.enter_context(open(options.markdown_file, 'w')) - providers = yaml_provider.standard_providers() - for transform in sorted(providers.keys(), key=io_grouping_key): - if include(transform) and not exclude(transform): - print(transform) - if options.markdown_file: - markdown_out.write(transform_docs(transform, providers[transform])) - markdown_out.write('\n\n') - if options.schema_file: - schema = providers[transform][0].config_schema(transform) - if schema: - json_config_schemas.append({ - 'if': { - 'properties': { - 'type': { - 'const': transform - } - } - }, - 'then': { - 'properties': { - 'config': { - 'type': 'object', - 'properties': { - '__line__': { - 'type': 'integer' - }, - '__uuid__': {}, - **{ - f.name: json_utils.beam_type_to_json_type( - f.type) - for f in schema.fields - } - }, - 'additionalProperties': False, - } - } - } - }) + markdown_out = io.StringIO() + providers = yaml_provider.standard_providers() + for transform in sorted(providers.keys(), key=io_grouping_key): + if include(transform) and not exclude(transform): + print(transform) + if options.markdown_file: + markdown_out.write(transform_docs(transform, providers[transform])) + markdown_out.write('\n\n') + if options.schema_file: + schema = providers[transform][0].config_schema(transform) + if schema: + json_config_schemas.append({ + 'if': { + 'properties': { + 'type': { + 'const': transform + } + } + }, + 'then': { + 'properties': { + 'config': { + 'type': 'object', + 'properties': { + '__line__': { + 'type': 'integer' + }, + '__uuid__': {}, + **{ + f.name: json_utils.beam_type_to_json_type( + f.type) + for f in schema.fields + } + }, + 'additionalProperties': False, + } + } + } + }) if options.schema_file: with open(options.schema_file, 'w') as fout: yaml.dump(json_config_schemas, fout, sort_keys=False) + if options.markdown_file: + with open(options.markdown_file, 'w') as fout: + fout.write(markdown_out.getvalue()) + + if options.html_file: + import markdown + import markdown.extensions.toc + import pygments.formatters + + title = 'Beam YAML Transform Index' + md = markdown.Markdown( + extensions=[ + markdown.extensions.toc.TocExtension(toc_depth=2), + 'codehilite', + ]) + html = md.convert(markdown_out.getvalue()) + pygments_style = pygments.formatters.HtmlFormatter().get_style_defs( + '.codehilite') + extra_style = ''' + .nav { + height: 100%; + width: 12em; + position: fixed; + top: 0; + left: 0; + overflow-x: hidden; + } + .nav a { + color: #333; + padding: .2em; + display: block; + text-decoration: none; + } + .nav a:hover { + color: #888; + } + .nav li { + list-style-type: none; + margin: 0; + padding: 0; + } + .content { + margin-left: 12em; + } + h2 { + margin-top: 2em; + } + ''' + + with open(options.html_file, 'w') as fout: + fout.write( + f''' + + + {title} + + + + +
+

{title}

+ {html} +
+ + + ''') + if __name__ == '__main__': main() diff --git a/sdks/python/apache_beam/yaml/yaml_provider.py b/sdks/python/apache_beam/yaml/yaml_provider.py index 615934090eb8..ae98449a1cdf 100755 --- a/sdks/python/apache_beam/yaml/yaml_provider.py +++ b/sdks/python/apache_beam/yaml/yaml_provider.py @@ -1039,9 +1039,15 @@ def config_schema(self, type): missing = set(self._mappings[type].values()) - set( underlying_schema_fields.keys()) if missing: - raise ValueError( - f"Mapping destinations {missing} for {type} are not in the " - f"underlying config schema {list(underlying_schema_fields.keys())}") + if 'kwargs' in underlying_schema_fields.keys(): + # These are likely passed by keyword argument dict rather than missing. + for field_name in missing: + underlying_schema_fields[field_name] = schema_pb2.Field( + name=field_name, type=typing_to_runner_api(Any)) + else: + raise ValueError( + f"Mapping destinations {missing} for {type} are not in the " + f"underlying config schema {list(underlying_schema_fields.keys())}") def with_name( original: schema_pb2.Field, new_name: str) -> schema_pb2.Field: