Skip to content

Commit

Permalink
[YAML] HTML generation of yaml transform catalogue. (apache#30707)
Browse files Browse the repository at this point in the history
  • Loading branch information
robertwb authored and hjtran committed Apr 4, 2024
1 parent 94e1027 commit fcfe80f
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 46 deletions.
160 changes: 117 additions & 43 deletions sdks/python/apache_beam/yaml/generate_yaml_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#

import argparse
import contextlib
import io
import re

import yaml
Expand Down Expand Up @@ -154,21 +154,25 @@ def transform_docs(t, providers):
return '\n'.join([
f'## {t}',
'',
longest(lambda p: p.description(t), providers),
longest(lambda p: p.description(t),
providers).replace('::\n', '\n\n :::yaml\n'),
'',
'### Configuration',
'',
longest(lambda p: config_docs(p.config_schema(t)), providers),
'',
'### Usage',
'',
' :::yaml',
'',
indent(longest(lambda p: pretty_example(p, t), providers), 4),
])


def main():
parser = argparse.ArgumentParser()
parser.add_argument('--markdown_file')
parser.add_argument('--html_file')
parser.add_argument('--schema_file')
parser.add_argument('--include', default='.*')
parser.add_argument(
Expand All @@ -179,52 +183,122 @@ def main():

with subprocess_server.SubprocessServer.cache_subprocesses():
json_config_schemas = []
with contextlib.ExitStack() as stack:
if options.markdown_file:
markdown_out = stack.enter_context(open(options.markdown_file, 'w'))
providers = yaml_provider.standard_providers()
for transform in sorted(providers.keys(), key=io_grouping_key):
if include(transform) and not exclude(transform):
print(transform)
if options.markdown_file:
markdown_out.write(transform_docs(transform, providers[transform]))
markdown_out.write('\n\n')
if options.schema_file:
schema = providers[transform][0].config_schema(transform)
if schema:
json_config_schemas.append({
'if': {
'properties': {
'type': {
'const': transform
}
}
},
'then': {
'properties': {
'config': {
'type': 'object',
'properties': {
'__line__': {
'type': 'integer'
},
'__uuid__': {},
**{
f.name: json_utils.beam_type_to_json_type(
f.type)
for f in schema.fields
}
},
'additionalProperties': False,
}
}
}
})
markdown_out = io.StringIO()
providers = yaml_provider.standard_providers()
for transform in sorted(providers.keys(), key=io_grouping_key):
if include(transform) and not exclude(transform):
print(transform)
if options.markdown_file:
markdown_out.write(transform_docs(transform, providers[transform]))
markdown_out.write('\n\n')
if options.schema_file:
schema = providers[transform][0].config_schema(transform)
if schema:
json_config_schemas.append({
'if': {
'properties': {
'type': {
'const': transform
}
}
},
'then': {
'properties': {
'config': {
'type': 'object',
'properties': {
'__line__': {
'type': 'integer'
},
'__uuid__': {},
**{
f.name: json_utils.beam_type_to_json_type(
f.type)
for f in schema.fields
}
},
'additionalProperties': False,
}
}
}
})

if options.schema_file:
with open(options.schema_file, 'w') as fout:
yaml.dump(json_config_schemas, fout, sort_keys=False)

if options.markdown_file:
with open(options.markdown_file, 'w') as fout:
fout.write(markdown_out.getvalue())

if options.html_file:
import markdown
import markdown.extensions.toc
import pygments.formatters

title = 'Beam YAML Transform Index'
md = markdown.Markdown(
extensions=[
markdown.extensions.toc.TocExtension(toc_depth=2),
'codehilite',
])
html = md.convert(markdown_out.getvalue())
pygments_style = pygments.formatters.HtmlFormatter().get_style_defs(
'.codehilite')
extra_style = '''
.nav {
height: 100%;
width: 12em;
position: fixed;
top: 0;
left: 0;
overflow-x: hidden;
}
.nav a {
color: #333;
padding: .2em;
display: block;
text-decoration: none;
}
.nav a:hover {
color: #888;
}
.nav li {
list-style-type: none;
margin: 0;
padding: 0;
}
.content {
margin-left: 12em;
}
h2 {
margin-top: 2em;
}
'''

with open(options.html_file, 'w') as fout:
fout.write(
f'''
<html>
<head>
<title>{title}</title>
<style>
{pygments_style}
{extra_style}
</style>
</head>
<body>
<div class="nav">
{md.toc}
</div>
<div class="content">
<h1>{title}</h1>
{html}
</div>
</body>
</html>
''')


if __name__ == '__main__':
main()
12 changes: 9 additions & 3 deletions sdks/python/apache_beam/yaml/yaml_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -1039,9 +1039,15 @@ def config_schema(self, type):
missing = set(self._mappings[type].values()) - set(
underlying_schema_fields.keys())
if missing:
raise ValueError(
f"Mapping destinations {missing} for {type} are not in the "
f"underlying config schema {list(underlying_schema_fields.keys())}")
if 'kwargs' in underlying_schema_fields.keys():
# These are likely passed by keyword argument dict rather than missing.
for field_name in missing:
underlying_schema_fields[field_name] = schema_pb2.Field(
name=field_name, type=typing_to_runner_api(Any))
else:
raise ValueError(
f"Mapping destinations {missing} for {type} are not in the "
f"underlying config schema {list(underlying_schema_fields.keys())}")

def with_name(
original: schema_pb2.Field, new_name: str) -> schema_pb2.Field:
Expand Down

0 comments on commit fcfe80f

Please sign in to comment.