From 1885251330952546d1a74d31d7a0324445ac2335 Mon Sep 17 00:00:00 2001 From: Robert Bradshaw Date: Thu, 19 Oct 2023 13:38:48 -0700 Subject: [PATCH 1/4] Add jsonschema dependency. --- sdks/python/container/py310/base_image_requirements.txt | 2 ++ sdks/python/container/py311/base_image_requirements.txt | 2 ++ sdks/python/container/py38/base_image_requirements.txt | 2 ++ sdks/python/container/py39/base_image_requirements.txt | 2 ++ sdks/python/setup.py | 1 + 5 files changed, 9 insertions(+) diff --git a/sdks/python/container/py310/base_image_requirements.txt b/sdks/python/container/py310/base_image_requirements.txt index 548c56371706..07e03a539bb0 100644 --- a/sdks/python/container/py310/base_image_requirements.txt +++ b/sdks/python/container/py310/base_image_requirements.txt @@ -81,6 +81,8 @@ idna==3.4 iniconfig==2.0.0 joblib==1.3.2 Js2Py==0.74 +jsonschema==4.19.1 +jsonschema-specifications==2023.7.1 mmh3==4.0.1 mock==5.1.0 nltk==3.8.1 diff --git a/sdks/python/container/py311/base_image_requirements.txt b/sdks/python/container/py311/base_image_requirements.txt index 2e5d834926bd..18f55e5d6927 100644 --- a/sdks/python/container/py311/base_image_requirements.txt +++ b/sdks/python/container/py311/base_image_requirements.txt @@ -78,6 +78,8 @@ idna==3.4 iniconfig==2.0.0 joblib==1.3.2 Js2Py==0.74 +jsonschema==4.19.1 +jsonschema-specifications==2023.7.1 mmh3==4.0.1 mock==5.1.0 nltk==3.8.1 diff --git a/sdks/python/container/py38/base_image_requirements.txt b/sdks/python/container/py38/base_image_requirements.txt index ed5d35fc6457..e474ab69c64c 100644 --- a/sdks/python/container/py38/base_image_requirements.txt +++ b/sdks/python/container/py38/base_image_requirements.txt @@ -82,6 +82,8 @@ idna==3.4 iniconfig==2.0.0 joblib==1.3.2 Js2Py==0.74 +jsonschema==4.19.1 +jsonschema-specifications==2023.7.1 mmh3==4.0.1 mock==5.1.0 nltk==3.8.1 diff --git a/sdks/python/container/py39/base_image_requirements.txt b/sdks/python/container/py39/base_image_requirements.txt index ff6ba0945e14..5d92e9601071 100644 --- a/sdks/python/container/py39/base_image_requirements.txt +++ b/sdks/python/container/py39/base_image_requirements.txt @@ -81,6 +81,8 @@ idna==3.4 iniconfig==2.0.0 joblib==1.3.2 Js2Py==0.74 +jsonschema==4.19.1 +jsonschema-specifications==2023.7.1 mmh3==4.0.1 mock==5.1.0 nltk==3.8.1 diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 4a05544526fc..32b623c0b40f 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -261,6 +261,7 @@ def get_portability_package_data(): 'hdfs>=2.1.0,<3.0.0', 'httplib2>=0.8,<0.23.0', 'js2py>=0.74,<1', + 'jsonschema>=4.0.0,<5.0.0', # numpy can have breaking changes in minor versions. # Use a strict upper bound. 'numpy>=1.14.3,<1.25.0', # Update pyproject.toml as well. From b402fafd44f08db06700b33b8414f4bcb0b6a00a Mon Sep 17 00:00:00 2001 From: Robert Bradshaw Date: Thu, 19 Oct 2023 13:46:02 -0700 Subject: [PATCH 2/4] Validate incoming JSON data from pubsub. As well as good practice, not doing so may result in much more obscure errors (e.g. during encoding) downstream. --- sdks/python/apache_beam/yaml/json_utils.py | 39 +++++++++++++++++++- sdks/python/apache_beam/yaml/yaml_io.py | 10 ++++- sdks/python/apache_beam/yaml/yaml_io_test.py | 32 ++++++++++++++++ 3 files changed, 77 insertions(+), 4 deletions(-) diff --git a/sdks/python/apache_beam/yaml/json_utils.py b/sdks/python/apache_beam/yaml/json_utils.py index e2cb03dc96a0..5b0f5d74fa77 100644 --- a/sdks/python/apache_beam/yaml/json_utils.py +++ b/sdks/python/apache_beam/yaml/json_utils.py @@ -24,6 +24,9 @@ from typing import Any from typing import Callable from typing import Dict +from typing import Optional + +import jsonschema import apache_beam as beam from apache_beam.portability.api import schema_pb2 @@ -131,15 +134,47 @@ def json_to_row(beam_type: schema_pb2.FieldType) -> Callable[[Any], Any]: raise ValueError(f"Unrecognized type_info: {type_info!r}") -def json_parser(beam_schema: schema_pb2.Schema) -> Callable[[bytes], beam.Row]: +def json_parser( + beam_schema: schema_pb2.Schema, + json_schema: Optional[Dict[str, + Any]] = None) -> Callable[[bytes], beam.Row]: """Returns a callable converting Json strings to Beam rows of the given type. The input to the returned callable is expected to conform to the Json schema corresponding to this Beam type. """ + if json_schema is None: + validate_fn = None + else: + cls = jsonschema.validators.validator_for(json_schema) + cls.check_schema(json_schema) + validate_fn = _PicklableFromConstructor(lambda: jsonschema.validators.validator_for(json_schema)( + json_schema).validate) + to_row = json_to_row( schema_pb2.FieldType(row_type=schema_pb2.RowType(schema=beam_schema))) - return lambda s: to_row(json.loads(s)) + + def parse(s: bytes): + o = json.loads(s) + if validate_fn is not None: + validate_fn(o) + return to_row(o) + + return parse + + +class _PicklableFromConstructor: + def __init__(self, constructor): + self._constructor = constructor + self._value = None + + def __call__(self, o): + if self._value is None: + self._value = self._constructor() + return self._value(o) + + def __getstate__(self): + return {'_constructor': self._constructor, '_value': None} def row_to_json(beam_type: schema_pb2.FieldType) -> Callable[[Any], Any]: diff --git a/sdks/python/apache_beam/yaml/yaml_io.py b/sdks/python/apache_beam/yaml/yaml_io.py index b2bf150fa558..bf4009719b80 100644 --- a/sdks/python/apache_beam/yaml/yaml_io.py +++ b/sdks/python/apache_beam/yaml/yaml_io.py @@ -148,7 +148,7 @@ def _create_parser( lambda payload: beam.Row(payload=payload)) elif format == 'json': beam_schema = json_utils.json_schema_to_beam_schema(schema) - return beam_schema, json_utils.json_parser(beam_schema) + return beam_schema, json_utils.json_parser(beam_schema, schema) elif format == 'avro': beam_schema = avroio.avro_schema_to_beam_schema(schema) covert_to_row = avroio.avro_dict_to_beam_row(schema, beam_schema) @@ -215,6 +215,8 @@ def read_from_pubsub( - raw: Produces records with a single `payload` field whose contents are the raw bytes of the pubsub message. + - avro: Parses records with a given avro schema. + - json: Parses records with a given json schema. schema: Schema specification for the given format. attributes: List of attribute keys whose values will be flattened into the @@ -309,8 +311,12 @@ def write_to_pubsub( formats are - raw: Expects a message with a single field (excluding - attribute-related fields )whose contents are used as the raw bytes + attribute-related fields) whose contents are used as the raw bytes of the pubsub message. + - avro: Encodes records with a given avro schema, which may be inferred + from the input PCollection schema. + - json: Formats records with a given json schema, which may be inferred + from the input PCollection schema. schema: Schema specification for the given format. attributes: List of attribute keys whose values will be pulled out as diff --git a/sdks/python/apache_beam/yaml/yaml_io_test.py b/sdks/python/apache_beam/yaml/yaml_io_test.py index 7071860a7bf1..10aec8e8256d 100644 --- a/sdks/python/apache_beam/yaml/yaml_io_test.py +++ b/sdks/python/apache_beam/yaml/yaml_io_test.py @@ -307,6 +307,38 @@ def test_read_json_without_error_handling(self): some_int: {type: integer} ''') + def test_read_json_with_bad_schema(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + with mock.patch( + 'apache_beam.io.ReadFromPubSub', + FakeReadFromPubSub(topic='my_topic', + messages=[PubsubMessage('{"some_int": 123}', + attributes={}), + PubsubMessage('{"some_int": "NOT"}', + attributes={})])): + result = p | YamlTransform( + ''' + type: ReadFromPubSub + config: + topic: my_topic + format: json + schema: + type: object + properties: + some_int: {type: integer} + error_handling: + output: errors + ''') + assert_that( + result['good'], + equal_to([beam.Row(some_int=123)]), + label='CheckGood') + assert_that( + result['errors'] | beam.Map(lambda error: error.element), + equal_to(['{"some_int": "NOT"}']), + label='CheckErrors') + def test_simple_write(self): with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( pickle_library='cloudpickle')) as p: From 2e676a369ad98d66103c5cbe450ec7796681a1e9 Mon Sep 17 00:00:00 2001 From: Robert Bradshaw Date: Thu, 19 Oct 2023 15:16:08 -0700 Subject: [PATCH 3/4] Regenrate python base deps files. --- .../py310/base_image_requirements.txt | 58 +++++++++--------- .../py311/base_image_requirements.txt | 56 +++++++++-------- .../py38/base_image_requirements.txt | 60 +++++++++++-------- .../py39/base_image_requirements.txt | 60 ++++++++++--------- 4 files changed, 128 insertions(+), 106 deletions(-) diff --git a/sdks/python/container/py310/base_image_requirements.txt b/sdks/python/container/py310/base_image_requirements.txt index 07e03a539bb0..d603ac16c0e2 100644 --- a/sdks/python/container/py310/base_image_requirements.txt +++ b/sdks/python/container/py310/base_image_requirements.txt @@ -24,10 +24,11 @@ attrs==23.1.0 beautifulsoup4==4.12.2 bs4==0.0.1 +build==1.0.3 cachetools==5.3.1 certifi==2023.7.22 -cffi==1.15.1 -charset-normalizer==3.2.0 +cffi==1.16.0 +charset-normalizer==3.3.0 click==8.1.7 cloudpickle==2.2.1 crcmod==1.7 @@ -40,17 +41,17 @@ docker==6.1.3 docopt==0.6.2 exceptiongroup==1.1.3 execnet==2.0.2 -fastavro==1.8.3 +fastavro==1.8.4 fasteners==0.19 freezegun==1.2.2 future==0.18.3 -google-api-core==2.11.1 -google-api-python-client==2.100.0 +google-api-core==2.12.0 +google-api-python-client==2.104.0 google-apitools==0.5.31 -google-auth==2.23.0 +google-auth==2.23.3 google-auth-httplib2==0.1.1 -google-cloud-aiplatform==1.33.1 -google-cloud-bigquery==3.11.4 +google-cloud-aiplatform==1.35.0 +google-cloud-bigquery==3.12.0 google-cloud-bigquery-storage==2.22.0 google-cloud-bigtable==2.21.0 google-cloud-core==2.3.3 @@ -63,20 +64,20 @@ google-cloud-pubsublite==1.8.3 google-cloud-recommendations-ai==0.10.5 google-cloud-resource-manager==1.10.4 google-cloud-spanner==3.40.1 -google-cloud-storage==2.11.0 +google-cloud-storage==2.12.0 google-cloud-videointelligence==2.11.4 -google-cloud-vision==3.4.4 +google-cloud-vision==3.4.5 google-crc32c==1.5.0 google-resumable-media==2.6.0 -googleapis-common-protos==1.60.0 -greenlet==2.0.2 +googleapis-common-protos==1.61.0 +greenlet==3.0.0 grpc-google-iam-v1==0.12.6 -grpcio==1.58.0 -grpcio-status==1.58.0 +grpcio==1.59.0 +grpcio-status==1.59.0 guppy3==3.1.3 -hdfs==2.7.2 +hdfs==2.7.3 httplib2==0.22.0 -hypothesis==6.87.0 +hypothesis==6.88.1 idna==3.4 iniconfig==2.0.0 joblib==1.3.2 @@ -90,15 +91,15 @@ nose==1.3.7 numpy==1.24.4 oauth2client==4.1.3 objsize==0.6.1 -orjson==3.9.7 +orjson==3.9.9 overrides==6.5.0 -packaging==23.1 +packaging==23.2 pandas==1.5.3 parameterized==0.9.0 pluggy==1.3.0 proto-plus==1.22.3 -protobuf==4.24.3 -psycopg2-binary==2.9.7 +protobuf==4.24.4 +psycopg2-binary==2.9.9 pyarrow==11.0.0 pyasn1==0.5.0 pyasn1-modules==0.3.0 @@ -109,20 +110,23 @@ pyjsparser==2.7.1 pymongo==4.5.0 PyMySQL==1.1.0 pyparsing==3.1.1 +pyproject_hooks==1.0.0 pytest==7.4.2 -pytest-timeout==2.1.0 +pytest-timeout==2.2.0 pytest-xdist==3.3.1 python-dateutil==2.8.2 python-snappy==0.6.1 pytz==2023.3.post1 PyYAML==6.0.1 -regex==2023.8.8 +referencing==0.30.2 +regex==2023.10.3 requests==2.31.0 requests-mock==1.11.0 +rpds-py==0.10.6 rsa==4.9 scikit-learn==1.3.1 -scipy==1.11.2 -Shapely==1.8.5.post1 +scipy==1.11.3 +shapely==2.0.2 six==1.16.0 sortedcontainers==2.4.0 soupsieve==2.5 @@ -134,9 +138,9 @@ threadpoolctl==3.2.0 tomli==2.0.1 tqdm==4.66.1 typing_extensions==4.8.0 -tzlocal==5.0.1 +tzlocal==5.1 uritemplate==4.1.1 -urllib3==1.26.17 -websocket-client==1.6.3 +urllib3==2.0.7 +websocket-client==1.6.4 wrapt==1.15.0 zstandard==0.21.0 diff --git a/sdks/python/container/py311/base_image_requirements.txt b/sdks/python/container/py311/base_image_requirements.txt index 18f55e5d6927..6f5d72aa35d3 100644 --- a/sdks/python/container/py311/base_image_requirements.txt +++ b/sdks/python/container/py311/base_image_requirements.txt @@ -24,10 +24,11 @@ attrs==23.1.0 beautifulsoup4==4.12.2 bs4==0.0.1 +build==1.0.3 cachetools==5.3.1 certifi==2023.7.22 -cffi==1.15.1 -charset-normalizer==3.2.0 +cffi==1.16.0 +charset-normalizer==3.3.0 click==8.1.7 cloudpickle==2.2.1 crcmod==1.7 @@ -39,16 +40,16 @@ dnspython==2.4.2 docker==6.1.3 docopt==0.6.2 execnet==2.0.2 -fastavro==1.8.3 +fastavro==1.8.4 fasteners==0.19 freezegun==1.2.2 future==0.18.3 -google-api-core==2.11.1 +google-api-core==2.12.0 google-apitools==0.5.31 -google-auth==2.23.0 +google-auth==2.23.3 google-auth-httplib2==0.1.1 -google-cloud-aiplatform==1.33.1 -google-cloud-bigquery==3.11.4 +google-cloud-aiplatform==1.35.0 +google-cloud-bigquery==3.12.0 google-cloud-bigquery-storage==2.22.0 google-cloud-bigtable==2.21.0 google-cloud-core==2.3.3 @@ -60,20 +61,20 @@ google-cloud-pubsublite==1.8.3 google-cloud-recommendations-ai==0.10.5 google-cloud-resource-manager==1.10.4 google-cloud-spanner==3.40.1 -google-cloud-storage==2.11.0 +google-cloud-storage==2.12.0 google-cloud-videointelligence==2.11.4 -google-cloud-vision==3.4.4 +google-cloud-vision==3.4.5 google-crc32c==1.5.0 google-resumable-media==2.6.0 -googleapis-common-protos==1.60.0 -greenlet==2.0.2 +googleapis-common-protos==1.61.0 +greenlet==3.0.0 grpc-google-iam-v1==0.12.6 -grpcio==1.58.0 -grpcio-status==1.58.0 +grpcio==1.59.0 +grpcio-status==1.59.0 guppy3==3.1.3 -hdfs==2.7.2 +hdfs==2.7.3 httplib2==0.22.0 -hypothesis==6.87.0 +hypothesis==6.88.1 idna==3.4 iniconfig==2.0.0 joblib==1.3.2 @@ -87,15 +88,15 @@ nose==1.3.7 numpy==1.24.4 oauth2client==4.1.3 objsize==0.6.1 -orjson==3.9.7 +orjson==3.9.9 overrides==6.5.0 -packaging==23.1 +packaging==23.2 pandas==1.5.3 parameterized==0.9.0 pluggy==1.3.0 proto-plus==1.22.3 -protobuf==4.24.3 -psycopg2-binary==2.9.7 +protobuf==4.24.4 +psycopg2-binary==2.9.9 pyarrow==11.0.0 pyasn1==0.5.0 pyasn1-modules==0.3.0 @@ -106,19 +107,22 @@ pyjsparser==2.7.1 pymongo==4.5.0 PyMySQL==1.1.0 pyparsing==3.1.1 +pyproject_hooks==1.0.0 pytest==7.4.2 -pytest-timeout==2.1.0 +pytest-timeout==2.2.0 pytest-xdist==3.3.1 python-dateutil==2.8.2 pytz==2023.3.post1 PyYAML==6.0.1 -regex==2023.8.8 +referencing==0.30.2 +regex==2023.10.3 requests==2.31.0 requests-mock==1.11.0 +rpds-py==0.10.6 rsa==4.9 scikit-learn==1.3.1 -scipy==1.11.2 -Shapely==1.8.5.post1 +scipy==1.11.3 +shapely==2.0.2 six==1.16.0 sortedcontainers==2.4.0 soupsieve==2.5 @@ -129,8 +133,8 @@ testcontainers==3.7.1 threadpoolctl==3.2.0 tqdm==4.66.1 typing_extensions==4.8.0 -tzlocal==5.0.1 -urllib3==1.26.16 -websocket-client==1.6.3 +tzlocal==5.1 +urllib3==2.0.7 +websocket-client==1.6.4 wrapt==1.15.0 zstandard==0.21.0 diff --git a/sdks/python/container/py38/base_image_requirements.txt b/sdks/python/container/py38/base_image_requirements.txt index e474ab69c64c..8e3a7ad4940f 100644 --- a/sdks/python/container/py38/base_image_requirements.txt +++ b/sdks/python/container/py38/base_image_requirements.txt @@ -25,10 +25,11 @@ attrs==23.1.0 backports.zoneinfo==0.2.1 beautifulsoup4==4.12.2 bs4==0.0.1 +build==1.0.3 cachetools==5.3.1 certifi==2023.7.22 -cffi==1.15.1 -charset-normalizer==3.2.0 +cffi==1.16.0 +charset-normalizer==3.3.0 click==8.1.7 cloudpickle==2.2.1 crcmod==1.7 @@ -41,17 +42,17 @@ docker==6.1.3 docopt==0.6.2 exceptiongroup==1.1.3 execnet==2.0.2 -fastavro==1.8.3 +fastavro==1.8.4 fasteners==0.19 freezegun==1.2.2 future==0.18.3 -google-api-core==2.11.1 -google-api-python-client==2.100.0 +google-api-core==2.12.0 +google-api-python-client==2.104.0 google-apitools==0.5.31 -google-auth==2.23.0 +google-auth==2.23.3 google-auth-httplib2==0.1.1 -google-cloud-aiplatform==1.33.1 -google-cloud-bigquery==3.11.4 +google-cloud-aiplatform==1.35.0 +google-cloud-bigquery==3.12.0 google-cloud-bigquery-storage==2.22.0 google-cloud-bigtable==2.21.0 google-cloud-core==2.3.3 @@ -64,21 +65,23 @@ google-cloud-pubsublite==1.8.3 google-cloud-recommendations-ai==0.10.5 google-cloud-resource-manager==1.10.4 google-cloud-spanner==3.40.1 -google-cloud-storage==2.11.0 +google-cloud-storage==2.12.0 google-cloud-videointelligence==2.11.4 -google-cloud-vision==3.4.4 +google-cloud-vision==3.4.5 google-crc32c==1.5.0 google-resumable-media==2.6.0 -googleapis-common-protos==1.60.0 -greenlet==2.0.2 +googleapis-common-protos==1.61.0 +greenlet==3.0.0 grpc-google-iam-v1==0.12.6 -grpcio==1.58.0 -grpcio-status==1.58.0 +grpcio==1.59.0 +grpcio-status==1.59.0 guppy3==3.1.3 -hdfs==2.7.2 +hdfs==2.7.3 httplib2==0.22.0 -hypothesis==6.87.0 +hypothesis==6.88.1 idna==3.4 +importlib-metadata==6.8.0 +importlib-resources==6.1.0 iniconfig==2.0.0 joblib==1.3.2 Js2Py==0.74 @@ -91,15 +94,16 @@ nose==1.3.7 numpy==1.24.4 oauth2client==4.1.3 objsize==0.6.1 -orjson==3.9.7 +orjson==3.9.9 overrides==6.5.0 -packaging==23.1 +packaging==23.2 pandas==1.5.3 parameterized==0.9.0 +pkgutil_resolve_name==1.3.10 pluggy==1.3.0 proto-plus==1.22.3 -protobuf==4.24.3 -psycopg2-binary==2.9.7 +protobuf==4.24.4 +psycopg2-binary==2.9.9 pyarrow==11.0.0 pyasn1==0.5.0 pyasn1-modules==0.3.0 @@ -110,20 +114,23 @@ pyjsparser==2.7.1 pymongo==4.5.0 PyMySQL==1.1.0 pyparsing==3.1.1 +pyproject_hooks==1.0.0 pytest==7.4.2 -pytest-timeout==2.1.0 +pytest-timeout==2.2.0 pytest-xdist==3.3.1 python-dateutil==2.8.2 python-snappy==0.6.1 pytz==2023.3.post1 PyYAML==6.0.1 -regex==2023.8.8 +referencing==0.30.2 +regex==2023.10.3 requests==2.31.0 requests-mock==1.11.0 +rpds-py==0.10.6 rsa==4.9 scikit-learn==1.3.1 scipy==1.10.1 -Shapely==1.8.5.post1 +shapely==2.0.2 six==1.16.0 sortedcontainers==2.4.0 soupsieve==2.5 @@ -135,9 +142,10 @@ threadpoolctl==3.2.0 tomli==2.0.1 tqdm==4.66.1 typing_extensions==4.8.0 -tzlocal==5.0.1 +tzlocal==5.1 uritemplate==4.1.1 -urllib3==1.26.17 -websocket-client==1.6.3 +urllib3==2.0.7 +websocket-client==1.6.4 wrapt==1.15.0 +zipp==3.17.0 zstandard==0.21.0 diff --git a/sdks/python/container/py39/base_image_requirements.txt b/sdks/python/container/py39/base_image_requirements.txt index 5d92e9601071..14e077b79223 100644 --- a/sdks/python/container/py39/base_image_requirements.txt +++ b/sdks/python/container/py39/base_image_requirements.txt @@ -24,10 +24,11 @@ attrs==23.1.0 beautifulsoup4==4.12.2 bs4==0.0.1 +build==1.0.3 cachetools==5.3.1 certifi==2023.7.22 -cffi==1.15.1 -charset-normalizer==3.2.0 +cffi==1.16.0 +charset-normalizer==3.3.0 click==8.1.7 cloudpickle==2.2.1 crcmod==1.7 @@ -40,17 +41,17 @@ docker==6.1.3 docopt==0.6.2 exceptiongroup==1.1.3 execnet==2.0.2 -fastavro==1.8.3 +fastavro==1.8.4 fasteners==0.19 freezegun==1.2.2 future==0.18.3 -google-api-core==2.11.1 -google-api-python-client==2.100.0 +google-api-core==2.12.0 +google-api-python-client==2.104.0 google-apitools==0.5.31 -google-auth==2.23.0 +google-auth==2.23.3 google-auth-httplib2==0.1.1 -google-cloud-aiplatform==1.33.1 -google-cloud-bigquery==3.11.4 +google-cloud-aiplatform==1.35.0 +google-cloud-bigquery==3.12.0 google-cloud-bigquery-storage==2.22.0 google-cloud-bigtable==2.21.0 google-cloud-core==2.3.3 @@ -63,21 +64,22 @@ google-cloud-pubsublite==1.8.3 google-cloud-recommendations-ai==0.10.5 google-cloud-resource-manager==1.10.4 google-cloud-spanner==3.40.1 -google-cloud-storage==2.11.0 +google-cloud-storage==2.12.0 google-cloud-videointelligence==2.11.4 -google-cloud-vision==3.4.4 +google-cloud-vision==3.4.5 google-crc32c==1.5.0 google-resumable-media==2.6.0 -googleapis-common-protos==1.60.0 -greenlet==2.0.2 +googleapis-common-protos==1.61.0 +greenlet==3.0.0 grpc-google-iam-v1==0.12.6 -grpcio==1.58.0 -grpcio-status==1.58.0 +grpcio==1.59.0 +grpcio-status==1.59.0 guppy3==3.1.3 -hdfs==2.7.2 +hdfs==2.7.3 httplib2==0.22.0 -hypothesis==6.87.0 +hypothesis==6.88.1 idna==3.4 +importlib-metadata==6.8.0 iniconfig==2.0.0 joblib==1.3.2 Js2Py==0.74 @@ -90,15 +92,15 @@ nose==1.3.7 numpy==1.24.4 oauth2client==4.1.3 objsize==0.6.1 -orjson==3.9.7 +orjson==3.9.9 overrides==6.5.0 -packaging==23.1 +packaging==23.2 pandas==1.5.3 parameterized==0.9.0 pluggy==1.3.0 proto-plus==1.22.3 -protobuf==4.24.3 -psycopg2-binary==2.9.7 +protobuf==4.24.4 +psycopg2-binary==2.9.9 pyarrow==11.0.0 pyasn1==0.5.0 pyasn1-modules==0.3.0 @@ -109,20 +111,23 @@ pyjsparser==2.7.1 pymongo==4.5.0 PyMySQL==1.1.0 pyparsing==3.1.1 +pyproject_hooks==1.0.0 pytest==7.4.2 -pytest-timeout==2.1.0 +pytest-timeout==2.2.0 pytest-xdist==3.3.1 python-dateutil==2.8.2 python-snappy==0.6.1 pytz==2023.3.post1 PyYAML==6.0.1 -regex==2023.8.8 +referencing==0.30.2 +regex==2023.10.3 requests==2.31.0 requests-mock==1.11.0 +rpds-py==0.10.6 rsa==4.9 scikit-learn==1.3.1 -scipy==1.11.2 -Shapely==1.8.5.post1 +scipy==1.11.3 +shapely==2.0.2 six==1.16.0 sortedcontainers==2.4.0 soupsieve==2.5 @@ -134,9 +139,10 @@ threadpoolctl==3.2.0 tomli==2.0.1 tqdm==4.66.1 typing_extensions==4.8.0 -tzlocal==5.0.1 +tzlocal==5.1 uritemplate==4.1.1 -urllib3==1.26.16 -websocket-client==1.6.3 +urllib3==2.0.7 +websocket-client==1.6.4 wrapt==1.15.0 +zipp==3.17.0 zstandard==0.21.0 From b97c7dee5aad99ad0fcf0ba53369811026f02012 Mon Sep 17 00:00:00 2001 From: Robert Bradshaw Date: Fri, 20 Oct 2023 09:33:38 -0700 Subject: [PATCH 4/4] yapf --- sdks/python/apache_beam/yaml/json_utils.py | 5 +++-- sdks/python/apache_beam/yaml/yaml_io_test.py | 14 +++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/sdks/python/apache_beam/yaml/json_utils.py b/sdks/python/apache_beam/yaml/json_utils.py index 5b0f5d74fa77..e11d18720617 100644 --- a/sdks/python/apache_beam/yaml/json_utils.py +++ b/sdks/python/apache_beam/yaml/json_utils.py @@ -148,8 +148,9 @@ def json_parser( else: cls = jsonschema.validators.validator_for(json_schema) cls.check_schema(json_schema) - validate_fn = _PicklableFromConstructor(lambda: jsonschema.validators.validator_for(json_schema)( - json_schema).validate) + validate_fn = _PicklableFromConstructor( + lambda: jsonschema.validators.validator_for(json_schema) + (json_schema).validate) to_row = json_to_row( schema_pb2.FieldType(row_type=schema_pb2.RowType(schema=beam_schema))) diff --git a/sdks/python/apache_beam/yaml/yaml_io_test.py b/sdks/python/apache_beam/yaml/yaml_io_test.py index 10aec8e8256d..d463430ce2e0 100644 --- a/sdks/python/apache_beam/yaml/yaml_io_test.py +++ b/sdks/python/apache_beam/yaml/yaml_io_test.py @@ -310,13 +310,13 @@ def test_read_json_without_error_handling(self): def test_read_json_with_bad_schema(self): with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( pickle_library='cloudpickle')) as p: - with mock.patch( - 'apache_beam.io.ReadFromPubSub', - FakeReadFromPubSub(topic='my_topic', - messages=[PubsubMessage('{"some_int": 123}', - attributes={}), - PubsubMessage('{"some_int": "NOT"}', - attributes={})])): + with mock.patch('apache_beam.io.ReadFromPubSub', + FakeReadFromPubSub( + topic='my_topic', + messages=[PubsubMessage('{"some_int": 123}', + attributes={}), + PubsubMessage('{"some_int": "NOT"}', + attributes={})])): result = p | YamlTransform( ''' type: ReadFromPubSub