apache · robertwb · Jul 10, 2024 · Jul 2, 2024 · Jul 2, 2024 · Jul 2, 2024
diff --git a/sdks/python/apache_beam/coders/row_coder.py b/sdks/python/apache_beam/coders/row_coder.py
@@ -117,7 +117,7 @@ def from_type_hint(cls, type_hint, registry):
     return cls(schema)
 
   @staticmethod
-  def from_payload(payload: bytes) -> RowCoder:
+  def from_payload(payload: bytes) -> 'RowCoder':
     return RowCoder(proto_utils.parse_Bytes(payload, schema_pb2.Schema))
 
   def __reduce__(self):

diff --git a/sdks/python/apache_beam/dataframe/convert.py b/sdks/python/apache_beam/dataframe/convert.py
@@ -17,10 +17,10 @@
 import inspect
 import warnings
 import weakref
-from typing import TYPE_CHECKING
 from typing import Any
 from typing import Dict
 from typing import Iterable
+from typing import Optional
 from typing import Tuple
 from typing import Union
 
@@ -35,10 +35,6 @@
 from apache_beam.dataframe.schemas import generate_proxy
 from apache_beam.typehints.pandas_type_compatibility import dtype_to_fieldtype
 
-if TYPE_CHECKING:
-  # pylint: disable=ungrouped-imports
-  from typing import Optional
-
 
 # TODO: Or should this be called as_dataframe?
 def to_dataframe(

diff --git a/sdks/python/apache_beam/dataframe/partitionings.py b/sdks/python/apache_beam/dataframe/partitionings.py
@@ -32,7 +32,7 @@ class Partitioning(object):
   def __repr__(self):
     return self.__class__.__name__
 
-  def is_subpartitioning_of(self, other: Partitioning) -> bool:
+  def is_subpartitioning_of(self, other: 'Partitioning') -> bool:
     """Returns whether self is a sub-partition of other.
 
     Specifically, returns whether something partitioned by self is necissarily

diff --git a/sdks/python/apache_beam/internal/metrics/cells.py b/sdks/python/apache_beam/internal/metrics/cells.py
@@ -55,15 +55,15 @@ def __init__(self, bucket_type):
   def reset(self):
     self.data = HistogramAggregator(self._bucket_type).identity_element()
 
-  def combine(self, other: HistogramCell) -> HistogramCell:
+  def combine(self, other: 'HistogramCell') -> 'HistogramCell':
     result = HistogramCell(self._bucket_type)
     result.data = self.data.combine(other.data)
     return result
 
   def update(self, value):
     self.data.histogram.record(value)
 
-  def get_cumulative(self) -> HistogramData:
+  def get_cumulative(self) -> 'HistogramData':
     return self.data.get_cumulative()
 
   def to_runner_api_monitoring_info(self, name, transform_id):
@@ -90,7 +90,7 @@ def __hash__(self):
 
 
 class HistogramResult(object):
-  def __init__(self, data: HistogramData) -> None:
+  def __init__(self, data: 'HistogramData') -> None:
     self.data = data
 
   def __eq__(self, other):
@@ -139,10 +139,10 @@ def __hash__(self):
   def __repr__(self):
     return 'HistogramData({})'.format(self.histogram.get_percentile_info())
 
-  def get_cumulative(self) -> HistogramData:
+  def get_cumulative(self) -> 'HistogramData':
     return HistogramData(self.histogram)
 
-  def combine(self, other: Optional[HistogramData]) -> HistogramData:
+  def combine(self, other: Optional['HistogramData']) -> 'HistogramData':
     if other is None:
       return self
 
@@ -156,7 +156,7 @@ class HistogramAggregator(MetricAggregator):
 
   Values aggregated should be ``HistogramData`` objects.
   """
-  def __init__(self, bucket_type: BucketType) -> None:
+  def __init__(self, bucket_type: 'BucketType') -> None:
     self._bucket_type = bucket_type
 
   def identity_element(self) -> HistogramData:

diff --git a/sdks/python/apache_beam/internal/metrics/metric.py b/sdks/python/apache_beam/internal/metrics/metric.py
@@ -86,8 +86,8 @@ def counter(
   def histogram(
       namespace: Union[Type, str],
       name: str,
-      bucket_type: BucketType,
-      logger: Optional[MetricLogger] = None) -> Metrics.DelegatingHistogram:
+      bucket_type: 'BucketType',
+      logger: Optional['MetricLogger'] = None) -> 'Metrics.DelegatingHistogram':
     """Obtains or creates a Histogram metric.
 
     Args:
@@ -109,8 +109,8 @@ class DelegatingHistogram(Histogram):
     def __init__(
         self,
         metric_name: MetricName,
-        bucket_type: BucketType,
-        logger: Optional[MetricLogger]) -> None:
+        bucket_type: 'BucketType',
+        logger: Optional['MetricLogger']) -> None:
       super().__init__(metric_name)
       self.metric_name = metric_name
       self.cell_type = HistogramCellFactory(bucket_type)
@@ -126,23 +126,23 @@ def update(self, value: object) -> None:
 class MetricLogger(object):
   """Simple object to locally aggregate and log metrics."""
   def __init__(self) -> None:
-    self._metric: Dict[MetricName, MetricCell] = {}
+    self._metric: Dict[MetricName, 'MetricCell'] = {}
     self._lock = threading.Lock()
     self._last_logging_millis = int(time.time() * 1000)
     self.minimum_logging_frequency_msec = 180000
 
   def update(
       self,
-      cell_type: Union[Type[MetricCell], MetricCellFactory],
+      cell_type: Union[Type['MetricCell'], 'MetricCellFactory'],
       metric_name: MetricName,
       value: object) -> None:
     cell = self._get_metric_cell(cell_type, metric_name)
     cell.update(value)
 
   def _get_metric_cell(
       self,
-      cell_type: Union[Type[MetricCell], MetricCellFactory],
-      metric_name: MetricName) -> MetricCell:
+      cell_type: Union[Type['MetricCell'], 'MetricCellFactory'],
+      metric_name: MetricName) -> 'MetricCell':
     with self._lock:
       if metric_name not in self._metric:
         self._metric[metric_name] = cell_type()
@@ -187,7 +187,7 @@ def __init__(
     self.base_labels = base_labels if base_labels else {}
     self.request_count_urn = request_count_urn
 
-  def call(self, status: Union[int, str, HttpError]) -> None:
+  def call(self, status: Union[int, str, 'HttpError']) -> None:
     """Record the status of the call into appropriate metrics."""
     canonical_status = self.convert_to_canonical_status_string(status)
     additional_labels = {monitoring_infos.STATUS_LABEL: canonical_status}
@@ -200,7 +200,7 @@ def call(self, status: Union[int, str, HttpError]) -> None:
     request_counter.inc()
 
   def convert_to_canonical_status_string(
-      self, status: Union[int, str, HttpError]) -> str:
+      self, status: Union[int, str, 'HttpError']) -> str:
     """Converts a status to a canonical GCP status cdoe string."""
     http_status_code = None
     if isinstance(status, int):

diff --git a/sdks/python/apache_beam/internal/module_test.py b/sdks/python/apache_beam/internal/module_test.py
@@ -21,7 +21,6 @@
 
 import re
 import sys
-from typing import Type
 
 
 class TopClass(object):
@@ -64,7 +63,7 @@ def get(self):
 class RecursiveClass(object):
   """A class that contains a reference to itself."""
 
-  SELF_TYPE: Type[RecursiveClass] = None
+  SELF_TYPE = None
 
   def __init__(self, datum):
     self.datum = 'RecursiveClass:%s' % datum

diff --git a/sdks/python/apache_beam/io/azure/blobstoragefilesystem.py b/sdks/python/apache_beam/io/azure/blobstoragefilesystem.py
@@ -150,9 +150,7 @@ def create(
       self,
       path,
       mime_type='application/octet-stream',
-      compression_type=CompressionTypes.AUTO) -> BinaryIO:
-    # noqa: F821
-
+      compression_type=CompressionTypes.AUTO):
     """Returns a write channel for the given file path.
 
     Args:
@@ -168,9 +166,7 @@ def open(
       self,
       path,
       mime_type='application/octet-stream',
-      compression_type=CompressionTypes.AUTO) -> BinaryIO:
-    # noqa: F821
-
+      compression_type=CompressionTypes.AUTO):
     """Returns a read channel for the given file path.
 
     Args:

diff --git a/sdks/python/apache_beam/io/gcp/bigquery_avro_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_avro_tools.py
@@ -23,6 +23,9 @@
 NOTHING IN THIS FILE HAS BACKWARDS COMPATIBILITY GUARANTEES.
 """
 
+from typing import Any
+from typing import Dict
+
 # BigQuery types as listed in
 # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
 # with aliases (RECORD, BOOLEAN, FLOAT, INTEGER) as defined in
@@ -63,20 +66,20 @@
 
 
 def get_record_schema_from_dict_table_schema(
-    schema_name: Text,
-    table_schema: Dict[Text, Any],
-    namespace: Text = "apache_beam.io.gcp.bigquery") -> Dict[Text, Any]:
+    schema_name: str,
+    table_schema: Dict[str, Any],
+    namespace: str = "apache_beam.io.gcp.bigquery") -> Dict[str, Any]:
   # noqa: F821
 
   """Convert a table schema into an Avro schema.
 
   Args:
-    schema_name (Text): The name of the record.
-    table_schema (Dict[Text, Any]): A BigQuery table schema in dict form.
-    namespace (Text): The namespace of the Avro schema.
+    schema_name (str): The name of the record.
+    table_schema (Dict[str, Any]): A BigQuery table schema in dict form.
+    namespace (str): The namespace of the Avro schema.
 
   Returns:
-    Dict[Text, Any]: The schema as an Avro RecordSchema.
+    Dict[str, Any]: The schema as an Avro RecordSchema.
   """
   avro_fields = [
       table_field_to_avro_field(field, ".".join((namespace, schema_name)))
@@ -92,17 +95,17 @@ def get_record_schema_from_dict_table_schema(
   }
 
 
-def table_field_to_avro_field(table_field: Dict[Text, Any],
-                              namespace: str) -> Dict[Text, Any]:
+def table_field_to_avro_field(table_field: Dict[str, Any],
+                              namespace: str) -> Dict[str, Any]:
   # noqa: F821
 
   """Convert a BigQuery field to an avro field.
 
   Args:
-    table_field (Dict[Text, Any]): A BigQuery field in dict form.
+    table_field (Dict[str, Any]): A BigQuery field in dict form.
 
   Returns:
-    Dict[Text, Any]: An equivalent Avro field in dict form.
+    Dict[str, Any]: An equivalent Avro field in dict form.
   """
   assert "type" in table_field, \
     "Unable to get type for table field {}".format(table_field)

diff --git a/sdks/python/apache_beam/io/gcp/bigquery_schema_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_schema_tools.py
@@ -54,7 +54,7 @@
 
 
 def generate_user_type_from_bq_schema(
-    the_table_schema, selected_fields: bigquery.TableSchema = None) -> type:
+    the_table_schema, selected_fields: 'bigquery.TableSchema' = None) -> type:
   """Convert a schema of type TableSchema into a pcollection element.
       Args:
         the_table_schema: A BQ schema of type TableSchema

diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/types.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/types.py
@@ -25,7 +25,6 @@
 from typing import Iterable
 from typing import List
 from typing import Optional
-from typing import Text
 from typing import Union
 
 from google.cloud.datastore import entity
@@ -155,10 +154,10 @@ def __repr__(self):
 class Key(object):
   def __init__(
       self,
-      path_elements: List[Union[Text, int]],
-      parent: Optional[Key] = None,
-      project: Optional[Text] = None,
-      namespace: Optional[Text] = None):
+      path_elements: List[Union[str, int]],
+      parent: Optional['Key'] = None,
+      project: Optional[str] = None,
+      namespace: Optional[str] = None):
     """
     Represents a Datastore key.
 

diff --git a/sdks/python/apache_beam/io/gcp/pubsub.py b/sdks/python/apache_beam/io/gcp/pubsub.py
@@ -110,7 +110,7 @@ def __repr__(self):
     return 'PubsubMessage(%s, %s)' % (self.data, self.attributes)
 
   @staticmethod
-  def _from_proto_str(proto_msg: bytes) -> PubsubMessage:
+  def _from_proto_str(proto_msg: bytes) -> 'PubsubMessage':
     """Construct from serialized form of ``PubsubMessage``.
 
     Args:
@@ -183,7 +183,7 @@ def _to_proto_str(self, for_publish=False):
     return serialized
 
   @staticmethod
-  def _from_message(msg: Any) -> PubsubMessage:
+  def _from_message(msg: Any) -> 'PubsubMessage':
     """Construct from ``google.cloud.pubsub_v1.subscriber.message.Message``.
 
     https://googleapis.github.io/google-cloud-python/latest/pubsub/subscriber/api/message.html

diff --git a/sdks/python/apache_beam/io/iobase.py b/sdks/python/apache_beam/io/iobase.py
@@ -182,7 +182,7 @@ def get_range_tracker(
       self,
       start_position: Optional[Any],
       stop_position: Optional[Any],
-  ) -> RangeTracker:
+  ) -> 'RangeTracker':
     """Returns a RangeTracker for a given position range.
 
     Framework may invoke ``read()`` method with the RangeTracker object returned
@@ -1281,7 +1281,7 @@ def current_restriction(self):
     """
     raise NotImplementedError
 
-  def current_progress(self) -> RestrictionProgress:
+  def current_progress(self) -> 'RestrictionProgress':
     """Returns a RestrictionProgress object representing the current progress.
 
     This API is recommended to be implemented. The runner can do a better job
@@ -1471,7 +1471,7 @@ def fraction_remaining(self) -> float:
     else:
       return float(self._remaining) / self.total_work
 
-  def with_completed(self, completed: int) -> RestrictionProgress:
+  def with_completed(self, completed: int) -> 'RestrictionProgress':
     return RestrictionProgress(
         fraction=self._fraction, remaining=self._remaining, completed=completed)
 

diff --git a/sdks/python/apache_beam/io/restriction_trackers.py b/sdks/python/apache_beam/io/restriction_trackers.py
@@ -62,7 +62,7 @@ def split(self, desired_num_offsets_per_split, min_num_offsets_per_split=1):
       yield OffsetRange(current_split_start, current_split_stop)
       current_split_start = current_split_stop
 
-  def split_at(self, split_pos) -> Tuple[OffsetRange, OffsetRange]:
+  def split_at(self, split_pos) -> Tuple['OffsetRange', 'OffsetRange']:
     return OffsetRange(self.start, split_pos), OffsetRange(split_pos, self.stop)
 
   def new_tracker(self):