Merge pull request #29687 [YAML] Better IO documentation.

apache · Dec 20, 2023 · 192f765 · 192f765
2 parents fa11c0f + 47ccb8a
commit 192f765
Show file tree

Hide file tree

Showing 6 changed files with 41 additions and 19 deletions.
diff --git a/sdks/python/apache_beam/io/avroio.py b/sdks/python/apache_beam/io/avroio.py
@@ -75,8 +75,15 @@
 
 
 class ReadFromAvro(PTransform):
-  """A :class:`~apache_beam.transforms.ptransform.PTransform` for reading avro
-  files."""
+  """A `PTransform` for reading records from avro files.
+
+  Each record of the resulting PCollection will contain
+  a single record read from a source. Records that are of simple types will be
+  mapped to beam Rows with a single `record` field containing the records
+  value. Records that are of Avro type ``RECORD`` will be mapped to Beam rows
+  that comply with the schema contained in the Avro file that contains those
+  records.
+  """
   def __init__(
       self,
       file_pattern=None,
@@ -355,7 +362,10 @@ def split_points_unclaimed(stop_position):
 
 
 class WriteToAvro(beam.transforms.PTransform):
-  """A ``PTransform`` for writing avro files."""
+  """A ``PTransform`` for writing avro files.
+
+  If the input has a schema, a corresponding avro schema will be automatically
+  generated and used to write the output records."""
   def __init__(
       self,
       file_path_prefix,

diff --git a/sdks/python/apache_beam/io/parquetio.py b/sdks/python/apache_beam/io/parquetio.py
@@ -231,9 +231,7 @@ def display_data(self):
 
 
 class ReadFromParquet(PTransform):
-  """A :class:`~apache_beam.transforms.ptransform.PTransform` for reading
-     Parquet files as a `PCollection` of dictionaries. This `PTransform` is
-     currently experimental. No backward-compatibility guarantees."""
+  """A `PTransform` for reading Parquet files."""
   def __init__(
       self,
       file_pattern=None,
@@ -465,9 +463,6 @@ def split_points_unclaimed(stop_position):
 
 class WriteToParquet(PTransform):
   """A ``PTransform`` for writing parquet files.
-
-    This ``PTransform`` is currently experimental. No backward-compatibility
-    guarantees.
   """
   def __init__(
       self,

diff --git a/sdks/python/apache_beam/yaml/generate_yaml_docs.py b/sdks/python/apache_beam/yaml/generate_yaml_docs.py
@@ -116,7 +116,7 @@ def lines():
       yield ''.join([
           f'**{f.name}** `{pretty_type(f.type)}`',
           maybe_optional(f.type),
-          ': ' + f.description if f.description else '',
+          indent(': ' + f.description if f.description else '', 2),
           maybe_row_parameters(f.type),
       ])
 

diff --git a/sdks/python/apache_beam/yaml/standard_io.yaml b/sdks/python/apache_beam/yaml/standard_io.yaml
@@ -239,13 +239,5 @@
       transforms:
         'ReadFromJdbc': 'beam:schematransform:org.apache.beam:jdbc_read:v1'
         'WriteToJdbc': 'beam:schematransform:org.apache.beam:jdbc_write:v1'
-        'ReadFromMySql': 'beam:schematransform:org.apache.beam:jdbc_read:v1'
-        'WriteToMySql': 'beam:schematransform:org.apache.beam:jdbc_write:v1'
-        'ReadFromPostgres': 'beam:schematransform:org.apache.beam:jdbc_read:v1'
-        'WriteToPostgres': 'beam:schematransform:org.apache.beam:jdbc_write:v1'
-        'ReadFromOracle': 'beam:schematransform:org.apache.beam:jdbc_read:v1'
-        'WriteToOracle': 'beam:schematransform:org.apache.beam:jdbc_write:v1'
-        'ReadFromSqlServer': 'beam:schematransform:org.apache.beam:jdbc_read:v1'
-        'WriteToSqlServer': 'beam:schematransform:org.apache.beam:jdbc_write:v1'
       config:
         gradle_target: 'sdks:java:extensions:schemaio-expansion-service:shadowJar'
diff --git a/sdks/python/apache_beam/yaml/yaml_io.py b/sdks/python/apache_beam/yaml/yaml_io.py
@@ -52,11 +52,29 @@
 def read_from_text(path: str):
   # TODO(yaml): Consider passing the filename and offset, possibly even
   # by default.
+
+  """Reads lines from a text files.
+
+  The resulting PCollection consists of rows with a single string filed named
+  "line."
+
+  Args:
+    path (str): The file path to read from.  The path can contain glob
+      characters such as ``*`` and ``?``.
+  """
   return beam_io.ReadFromText(path) | beam.Map(lambda s: beam.Row(line=s))
 
 
 @beam.ptransform_fn
 def write_to_text(pcoll, path: str):
+  """Writes a PCollection to a (set of) text files(s).
+
+  The input must be a PCollection whose schema has exactly one field.
+
+  Args:
+      path (str): The file path to write to. The files written will
+        begin with this prefix, followed by a shard identifier.
+  """
   try:
     field_names = [
         name for name,
@@ -76,6 +94,11 @@ def write_to_text(pcoll, path: str):
 
 def read_from_bigquery(
     query=None, table=None, row_restriction=None, fields=None):
+  """Reads data from BigQuery.
+
+  Exactly one of table or query must be set.
+  If query is set, neither row_restriction nor fields should be set.
+  """
   if query is None:
     assert table is not None
   else:
@@ -95,6 +118,7 @@ def write_to_bigquery(
     create_disposition=BigQueryDisposition.CREATE_IF_NEEDED,
     write_disposition=BigQueryDisposition.WRITE_APPEND,
     error_handling=None):
+  """Writes data to a BigQuery table."""
   class WriteToBigQueryHandlingErrors(beam.PTransform):
     def default_label(self):
       return 'WriteToBigQuery'

diff --git a/sdks/python/apache_beam/yaml/yaml_provider.py b/sdks/python/apache_beam/yaml/yaml_provider.py
@@ -451,7 +451,8 @@ def empty_if_none(s):
 
     docs = self.get_docs(typ)
     return (
-        empty_if_none(docs.short_description) + '\n\n' +
+        empty_if_none(docs.short_description) +
+        ('\n\n' if docs.blank_after_short_description else '\n') +
         empty_if_none(docs.long_description)).strip() or None
 
   def get_docs(self, typ):