diff --git a/sdks/python/apache_beam/ml/inference/tensorrt_inference.py b/sdks/python/apache_beam/ml/inference/tensorrt_inference.py index b38947b494c2..9563aa05232a 100644 --- a/sdks/python/apache_beam/ml/inference/tensorrt_inference.py +++ b/sdks/python/apache_beam/ml/inference/tensorrt_inference.py @@ -125,7 +125,7 @@ def __init__(self, engine: trt.ICudaEngine): # TODO(https://github.com/NVIDIA/TensorRT/issues/2557): # Clean up when fixed upstream. try: - _ = np.bool # type: ignore + _ = np.bool except AttributeError: # numpy >= 1.24.0 np.bool = np.bool_ # type: ignore @@ -258,7 +258,7 @@ def __init__( model_copies: The exact number of models that you would like loaded onto your machine. This can be useful if you exactly know your CPU or GPU capacity and want to maximize resource utilization. - max_batch_duration_secs: the maximum amount of time to buffer + max_batch_duration_secs: the maximum amount of time to buffer a batch before emitting; used in streaming contexts. kwargs: Additional arguments like 'engine_path' and 'onnx_path' are currently supported. 'env_vars' can be used to set environment variables