diff --git a/.github/trigger_files/beam_PostCommit_Python_Xlang_Gcp_Dataflow.json b/.github/trigger_files/beam_PostCommit_Java_DataflowV2.json similarity index 100% rename from .github/trigger_files/beam_PostCommit_Python_Xlang_Gcp_Dataflow.json rename to .github/trigger_files/beam_PostCommit_Java_DataflowV2.json diff --git a/.github/workflows/beam_PostCommit_Java_DataflowV2.yml b/.github/workflows/beam_PostCommit_Java_DataflowV2.yml index 58ecc37361d4..3abbe9cdc6c6 100644 --- a/.github/workflows/beam_PostCommit_Java_DataflowV2.yml +++ b/.github/workflows/beam_PostCommit_Java_DataflowV2.yml @@ -19,7 +19,7 @@ name: PostCommit Java Dataflow V2 on: schedule: - - cron: '30 3/6 * * *' + - cron: '30 3/8 * * *' pull_request_target: paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Java_DataflowV2.json'] workflow_dispatch: @@ -54,7 +54,7 @@ jobs: beam_PostCommit_Java_DataflowV2: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) runs-on: [self-hosted, ubuntu-20.04, main] - timeout-minutes: 240 + timeout-minutes: 360 strategy: matrix: job_name: [beam_PostCommit_Java_DataflowV2] diff --git a/.github/workflows/beam_PostCommit_TransformService_Direct.yml b/.github/workflows/beam_PostCommit_TransformService_Direct.yml index 0cb4352c2fbb..1497681ae991 100644 --- a/.github/workflows/beam_PostCommit_TransformService_Direct.yml +++ b/.github/workflows/beam_PostCommit_TransformService_Direct.yml @@ -91,7 +91,7 @@ jobs: uses: actions/upload-artifact@v4 if: failure() with: - name: Python Test Results + name: Python ${{ matrix.python_version }} Test Results path: '**/pytest*.xml' - name: Publish Python Test Results uses: EnricoMi/publish-unit-test-result-action@v2 diff --git a/.test-infra/metrics/sync/github/sync_workflows.py b/.test-infra/metrics/sync/github/sync_workflows.py index a2c062b175ea..d2400dab4080 100644 --- a/.test-infra/metrics/sync/github/sync_workflows.py +++ b/.test-infra/metrics/sync/github/sync_workflows.py @@ -328,7 +328,6 @@ 'Beam Metrics Report', 'Build and Version Runner Docker Image', 'PreCommit GHA', - 'pr-bot-prs-needing-attention', 'PreCommit RAT', 'Assign or close an issue', 'PostCommit Website Test', @@ -339,8 +338,10 @@ 'PreCommit Whitespace', 'Publish Beam SDK Snapshots', 'Cancel Stale Dataflow Jobs', + 'pr-bot-new-prs', 'pr-bot-pr-updates', - 'pr-bot-new-prs' + 'pr-bot-prs-needing-attention', + 'pr-bot-update-reviewers' ] MISC_TESTS = [ @@ -355,7 +356,6 @@ 'Cancel', 'PostCommit PortableJar Spark', 'PreCommit Integration and Load Test Framework', - 'pr-bot-update-reviewers', 'PostCommit TransformService Direct', 'Cut Release Branch', 'Generate issue report', @@ -404,7 +404,7 @@ def get_dashboard_category(workflow_name): return 'go' if workflow_name in MISC_TESTS: return 'misc' - + print(f'No category found for workflow: {workflow_name}') print('Falling back to rules based assignment') @@ -471,7 +471,7 @@ def get_token(): git_integration = GithubIntegration(GH_APP_ID, GH_PEM_KEY) token = git_integration.get_access_token(GH_APP_INSTALLATION_ID).token return f'Bearer {token}' - + @backoff.on_exception(backoff.constant, aiohttp.ClientResponseError, max_tries=5) async def fetch(url, semaphore, params=None, headers=None, request_id=None): async with semaphore: @@ -574,7 +574,7 @@ def append_workflow_runs(workflow, runs): else: workflow_ids_to_fetch_extra_runs.pop(workflow_id, None) print(f"Successfully fetched details for: {workflow.filename}") - + page = math.ceil( int(GH_NUMBER_OF_WORKFLOW_RUNS_TO_FETCH) / number_of_entries_per_page ) + 1 diff --git a/.test-infra/tools/stale_bq_datasets_cleaner.sh b/.test-infra/tools/stale_bq_datasets_cleaner.sh index 47e8ea95498e..c4afabe11e9a 100755 --- a/.test-infra/tools/stale_bq_datasets_cleaner.sh +++ b/.test-infra/tools/stale_bq_datasets_cleaner.sh @@ -18,7 +18,7 @@ # Deletes stale and old BQ datasets that are left after tests. # -set -euo pipefail +set -exuo pipefail PROJECT=apache-beam-testing MAX_RESULT=1500 @@ -36,19 +36,27 @@ for dataset in ${BQ_DATASETS[@]}; do if [[ $dataset =~ $template ]]; then # The BQ API reports LAST MODIFIED TIME in miliseconds, while unix works in seconds since epoch # thus why we need to convert to seconds. - [[ `bq --format=json --project_id=$PROJECT show $dataset` =~ \"lastModifiedTime\":\"([0-9]+)\" ]] - LAST_MODIFIED_MS=${BASH_REMATCH[1]} - LAST_MODIFIED=$(($LAST_MODIFIED_MS / 1000)) - if [[ $GRACE_PERIOD -gt $LAST_MODIFIED ]]; then - if bq --project_id=$PROJECT rm -r -f $dataset; then - if [[ $OSTYPE == "linux-gnu"* ]]; then - # date command usage depending on OS - echo "Deleted $dataset (modified `date -d @$LAST_MODIFIED`)" - elif [[ $OSTYPE == "darwin"* ]]; then - echo "Deleted $dataset (modified `date -r @$LAST_MODIFIED`)" + + failed=0 + ds=`bq --format=json --project_id=$PROJECT show $dataset` || failed=1 + if [[ $failed -eq 1 ]]; then + echo "Could not find dataset $dataset - it may have already been deleted, skipping" + else + [[ $ds =~ \"lastModifiedTime\":\"([0-9]+)\" ]] + LAST_MODIFIED_MS=${BASH_REMATCH[1]} + LAST_MODIFIED=$(($LAST_MODIFIED_MS / 1000)) + if [[ $GRACE_PERIOD -gt $LAST_MODIFIED ]]; then + if bq --project_id=$PROJECT rm -r -f $dataset; then + if [[ $OSTYPE == "linux-gnu"* ]]; then + # date command usage depending on OS + echo "Deleted $dataset (modified `date -d @$LAST_MODIFIED`)" + elif [[ $OSTYPE == "darwin"* ]]; then + echo "Deleted $dataset (modified `date -r @$LAST_MODIFIED`)" + fi + else + echo "Tried and failed to delete $dataset" + failed_calls+=1 fi - else - failed_calls+=1 fi fi break diff --git a/.test-infra/tools/stale_dataflow_prebuilt_image_cleaner.sh b/.test-infra/tools/stale_dataflow_prebuilt_image_cleaner.sh index 804f6e2f39d4..126249324fed 100755 --- a/.test-infra/tools/stale_dataflow_prebuilt_image_cleaner.sh +++ b/.test-infra/tools/stale_dataflow_prebuilt_image_cleaner.sh @@ -27,14 +27,14 @@ PRIVATE_REPOSITORIES=(java-postcommit-it python-postcommit-it jenkins github-act # set as the same as 6-week release period if [[ $OSTYPE == "linux-gnu"* ]]; then # date command usage depending on OS - DELETE_BEFORE_DAY=$(date --iso-8601=s -d '6 weeks ago') + DELETE_BEFORE_PUBLIC=$(date --iso-8601=s -d '6 weeks ago') + DELETE_BEFORE_PRIVATE=$(date --iso-8601=s -d '3 days ago') elif [[ $OSTYPE == "darwin"* ]]; then - DELETE_BEFORE_DAY=$(date -j -v-6w '+%Y-%m-%dT%H:%M:%S') + DELETE_BEFORE_PUBLIC=$(date -j -v-6w '+%Y-%m-%dT%H:%M:%S') + DELETE_BEFORE_PRIVATE=$(date -j -v-3d '+%Y-%m-%dT%H:%M:%S') fi -REPOSITORIES=("${PUBLIC_REPOSITORIES[@]/#/gcr.io/apache-beam-testing/}" "${PRIVATE_REPOSITORIES[@]/#/us.gcr.io/apache-beam-testing/}") - -echo $REPOSITORIES +REPOSITORIES=("${PRIVATE_REPOSITORIES[@]/#/us.gcr.io/apache-beam-testing/}" "${PUBLIC_REPOSITORIES[@]/#/gcr.io/apache-beam-testing/}") # walk repos recursively IMAGE_NAMES="" @@ -54,8 +54,8 @@ while [ -n "$REPOSITORIES" ]; do REPOSITORIES=("${PENDING_REPOSITORIES[@]}") done -STALE_IMAGES="" -FAILED_INSPECT="" +HAS_STALE_IMAGES="" +FAILED_IMAGES="" for image_name in ${IMAGE_NAMES[@]}; do echo IMAGES FOR image ${image_name} @@ -64,22 +64,31 @@ for image_name in ${IMAGE_NAMES[@]}; do LATEST_IN_TIME=$(gcloud container images list-tags \ ${image_name} --sort-by="~TIMESTAMP" --filter="NOT tags:latest " --format="get(digest)" --limit=1) if [ -n "$LATEST_IN_TIME" ]; then + # decide timestamp cutoff + if [[ $image_name =~ 'us.gcr.io' ]]; then + DELETE_BEFORE_DAY=$DELETE_BEFORE_PRIVATE + else + DELETE_BEFORE_DAY=$DELETE_BEFORE_PUBLIC + fi # list containers of the image name echo "Command" gcloud container images list-tags \ ${image_name} \ --sort-by=TIMESTAMP --filter="NOT tags:latest AND timestamp.datetime < $DELETE_BEFORE_DAY" \ - --format="get(digest)" - STALE_IMAGES_CURRENT=$(gcloud container images list-tags \ + --format="get(digest,timestamp.year)" + STALE_IMAGES=$(gcloud container images list-tags \ ${image_name} \ --sort-by=TIMESTAMP --filter="NOT tags:latest AND timestamp.datetime < $DELETE_BEFORE_DAY" \ - --format="get(digest)") - STALE_IMAGES+=$STALE_IMAGES_CURRENT - for current in ${STALE_IMAGES_CURRENT[@]}; do + --format="get(digest,timestamp.year)") + + STALE_IMAGES_CURRENT=($STALE_IMAGES) + for (( i_stale_images_current=0; i_stale_images_current<${#STALE_IMAGES_CURRENT[@]} ; i_stale_images_current+=2 )) ; do + current=${STALE_IMAGES_CURRENT[i_stale_images_current]} + currentyear=${STALE_IMAGES_CURRENT[i_stale_images_current+1]} # do not delete the one with latest label and the newest image without latest label # this make sure we leave at least one container under each image name, either labelled "latest" or not if [ "$LATEST_IN_TIME" != "$current" ]; then - if [[ $image_name =~ 'beamgrafana' || $image_name =~ 'beammetricssyncjenkins' || $image_name =~ 'beammetricssyncgithub' ]]; then - # Skip docker manifest inspect for known single arch images, workaround permission issue & saving API call + if [[ $currentyear > 1970 ]]; then + # Skip docker manifest inspect for those not in epoch to save API call SHOULD_DELETE=0 else # Check to see if this image is built on top of earlier images. This is the case for multiarch images, @@ -89,15 +98,15 @@ for image_name in ${IMAGE_NAMES[@]}; do MANIFEST=$(docker manifest inspect ${image_name}@"${current}" || echo "") if [ -z "$MANIFEST" ]; then # Sometimes "no such manifest" seen. Skip current if command hit error - FAILED_INSPECT+=" $current" + FAILED_IMAGES+=" $current" continue fi SHOULD_DELETE=0 DIGEST=$(echo $MANIFEST | jq -r '.manifests[0].digest') if [ "$DIGEST" != "null" ]; then SHOULD_DELETE=1 - for i in ${STALE_IMAGES_CURRENT[@]}; do - if [ "$i" = "$DIGEST" ]; then + for (( j_stale_images_current=0; j_stale_images_current<${#STALE_IMAGES_CURRENT[@]} ; j_stale_images_current+=2 )) ; do + if [ "${STALE_IMAGES_CURRENT[j_stale_images_current]}" = "$DIGEST" ]; then SHOULD_DELETE=0 break fi @@ -105,8 +114,8 @@ for image_name in ${IMAGE_NAMES[@]}; do fi fi - if [ $SHOULD_DELETE = 0 ] - then + if [ $SHOULD_DELETE = 0 ]; then + HAS_STALE_IMAGES="true" echo "Deleting image. Command: gcloud container images delete ${image_name}@"${current}" --force-delete-tags -q" gcloud container images delete ${image_name}@"${current}" --force-delete-tags -q || FAILED_TO_DELETE+="${current} " fi @@ -120,18 +129,18 @@ for image_name in ${IMAGE_NAMES[@]}; do echo "Failed to delete the following images: ${FAILED_TO_DELETE}. Retrying each of them." for current in $RETRY_DELETE; do echo "Trying again to delete image ${image_name}@"${current}". Command: gcloud container images delete ${image_name}@"${current}" --force-delete-tags -q" - gcloud container images delete ${image_name}@"${current}" --force-delete-tags -q + gcloud container images delete ${image_name}@"${current}" --force-delete-tags -q || FAILED_IMAGES+=" ${image_name}@${current}" done fi done -if [[ ${STALE_IMAGES} ]]; then +if [[ -n "$HAS_STALE_IMAGES" ]]; then echo "Deleted multiple images" else echo "No stale prebuilt container images found." fi -if [ -n "$FAILED_INSPECT" ]; then - echo "Failed delete images $FAILED_INSPECT" +if [ -n "$FAILED_IMAGES" ]; then + echo "Failed delete images $FAILED_IMAGES" exit 1 fi \ No newline at end of file diff --git a/CHANGES.md b/CHANGES.md index 2d9c249bf6b3..c63464a6e01e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -65,7 +65,8 @@ ## New Features / Improvements -* X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). +* [Enrichment Transform](https://s.apache.org/enrichment-transform) along with GCP BigTable handler added to Python SDK ([#30001](https://github.com/apache/beam/pull/30001)). +* Allow writing clustered and not time partitioned BigQuery tables (Java) ([#30094](https://github.com/apache/beam/pull/30094)). ## Breaking Changes diff --git a/build.gradle.kts b/build.gradle.kts index d4bd0fca4fc1..7e4c6cdc71ab 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -293,7 +293,6 @@ tasks.register("javaPreCommit") { dependsOn(":sdks:java:extensions:sorter:build") dependsOn(":sdks:java:extensions:timeseries:build") dependsOn(":sdks:java:extensions:zetasketch:build") - dependsOn(":sdks:java:fn-execution:build") dependsOn(":sdks:java:harness:build") dependsOn(":sdks:java:harness:jmh:build") dependsOn(":sdks:java:io:bigquery-io-perf-tests:build") @@ -313,6 +312,7 @@ tasks.register("javaPreCommit") { dependsOn(":sdks:java:testing:tpcds:build") dependsOn(":sdks:java:testing:watermarks:build") dependsOn(":sdks:java:transform-service:build") + dependsOn(":sdks:java:transform-service:app:build") dependsOn(":sdks:java:transform-service:launcher:build") } diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index f3d79c528f8c..10a7ba7f1c84 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -2860,7 +2860,7 @@ class BeamModulePlugin implements Plugin { project.evaluationDependsOn(":sdks:python") project.evaluationDependsOn(":runners:core-construction-java") project.evaluationDependsOn(":sdks:java:extensions:python") - project.evaluationDependsOn(":sdks:java:transform-service:launcher") + project.evaluationDependsOn(":sdks:java:transform-service:app") def usesDataflowRunner = config.pythonPipelineOptions.contains("--runner=TestDataflowRunner") || config.pythonPipelineOptions.contains("--runner=DataflowRunner") @@ -2868,7 +2868,7 @@ class BeamModulePlugin implements Plugin { def envDir = project.project(":sdks:python").envdir def pythonDir = project.project(":sdks:python").projectDir def externalPort = getRandomPort() - def launcherJar = project.project(':sdks:java:transform-service:launcher').shadowJar.archivePath + def launcherJar = project.project(':sdks:java:transform-service:app').shadowJar.archivePath def groupId = project.name + randomUUID().toString() def transformServiceOpts = [ "transform_service_launcher_jar": launcherJar, @@ -2895,7 +2895,7 @@ class BeamModulePlugin implements Plugin { dependsOn ':sdks:python:expansion-service-container:docker' dependsOn ':sdks:java:expansion-service:container:docker' dependsOn ":sdks:python:installGcpTest" - dependsOn project.project(':sdks:java:transform-service:launcher').shadowJar.getPath() + dependsOn project.project(':sdks:java:transform-service:app').shadowJar.getPath() if (usesDataflowRunner) { dependsOn ":sdks:python:test-suites:dataflow:py${project.ext.pythonVersion.replace('.', '')}:initializeForDataflowJob" diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring_1_60_1.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring_1_60_1.groovy index f12f3bac35c8..b2c7053dfb60 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring_1_60_1.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/GrpcVendoring_1_60_1.groovy @@ -51,14 +51,16 @@ class GrpcVendoring_1_60_1 { "com.google.protobuf:protobuf-java:$protobuf_version", "com.google.protobuf:protobuf-java-util:$protobuf_version", "com.google.code.gson:gson:$gson_version", + "io.grpc:grpc-alts:$grpc_version", "io.grpc:grpc-auth:$grpc_version", - "io.grpc:grpc-core:$grpc_version", "io.grpc:grpc-context:$grpc_version", + "io.grpc:grpc-core:$grpc_version", "io.grpc:grpc-netty-shaded:$grpc_version", "io.grpc:grpc-protobuf:$grpc_version", + "io.grpc:grpc-services:$grpc_version", "io.grpc:grpc-stub:$grpc_version", - "io.grpc:grpc-alts:$grpc_version", "io.grpc:grpc-testing:$grpc_version", + "io.grpc:grpc-util:$grpc_version", "com.google.auth:google-auth-library-credentials:$google_auth_version", "com.google.api.grpc:proto-google-common-protos:$proto_google_common_protos_version", "io.opencensus:opencensus-api:$opencensus_version", diff --git a/contributor-docs/release-guide.md b/contributor-docs/release-guide.md index b3b3213d9c97..559d3c0f9f0f 100644 --- a/contributor-docs/release-guide.md +++ b/contributor-docs/release-guide.md @@ -564,11 +564,11 @@ The following should be confirmed: At [https://hub.docker.com/u/apache](https://hub.docker.com/search?q=apache%2Fbeam&type=image), visit each repository and navigate to "tags" tab. Verify images are pushed -with tags: `${RELEASE_VERSION}rc{RC_NUM}` +with tags: `${RELEASE_VERSION}rc${RC_NUM}` Verify that third party licenses are included in Docker. You can do this with a simple script: - RC_TAG=${RELEASE_VERSION}rc{RC_NUM} + RC_TAG=${RELEASE_VERSION}rc${RC_NUM} for pyver in 3.8 3.9 3.10 3.11; do docker run --rm --entrypoint sh \ apache/beam_python${pyver}_sdk:${RC_TAG} \ @@ -577,7 +577,7 @@ Verify that third party licenses are included in Docker. You can do this with a for javaver in 8 11 17; do docker run --rm --entrypoint sh \ - apache/beam_java${pyver}_sdk:${RC_TAG} \ + apache/beam_java${javaver}_sdk:${RC_TAG} \ -c 'ls -al /opt/apache/beam/third_party_licenses/ | wc -l' done diff --git a/learning/prompts/README.md b/learning/prompts/README.md new file mode 100644 index 000000000000..ea5d7d8bd79d --- /dev/null +++ b/learning/prompts/README.md @@ -0,0 +1,76 @@ + +This folder holds golden prompt/response pairs for Google Duet AI training. + +A golden prompt/response pair contains two parts: +1. An example prompt/question to ask an LLM +2. An ideal answer we would expect the LLM to generate + +Each prompt/response pair is a markdown file with the following structure: +``` +Prompt: + + +Response: + +``` +This folder includes the following types of golden prompts: +- Documentation lookup prompts +- Code generation prompts +- Code explanation prompts + +## Documentation lookup prompts +Features of a good response: +- Uses official product names in the response (“Speech to text” → “Speech-to-Text”). +- Answers the question (correctly) with hyperlinks to the documentation. +- Includes a link to the corresponding source code. +- Includes a link to samples, if available. + + +## Code generation prompts +Features of a good response: +- Starts with a brief introduction that explains the code sample. +- Includes information about how to find the reference documentation. +- Includes a link to the list of code samples. +- Provides well documented code. Consider including an example of what the return result looks like. +- Follows up with the user to ensure they don’t continue needlessly with false responses. + + +## Code explanation prompts +Features of a good response: +- Starts with a short overall description that tries to answer the question in the prompt. +- Grounds the algorithm in any well-known context, if appropriate. For example, this is an implementation of X, a well-known algorithm to do Y. +- Discusses the variables in the snippet, and what their purpose is relative to the runtime. +- Discusses runtime and memory storage complexity. +- Notes any interesting features of the code, or opportunities for improvement (optimizations, refactoring, syntax best practices, etc.) + +Folder structure: +``` +learning/prompts/ +├── code-explanation +│   ├── 01_io_kafka.md +│   └── ... +├── code-generation +│   ├── 01_io_kafka.md +│   └── ... +├── documentation-lookup +│   ├── 01_basic_learning_apache_beam.md +│   └── ... +└── README.md +``` \ No newline at end of file diff --git a/learning/prompts/code-explanation/01_io_kafka.md b/learning/prompts/code-explanation/01_io_kafka.md new file mode 100644 index 000000000000..c6b36b400b41 --- /dev/null +++ b/learning/prompts/code-explanation/01_io_kafka.md @@ -0,0 +1,65 @@ +Prompt: +What does this code do? +```python +class KafkaOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--bootstrap_servers', + default="localhost:9092", + help='Apache Kafka bootstrap servers' + ) + parser.add_argument( + '--topic', + default="your-topic", + help='Apache Kafka topic' + ) + +options = KafkaOptions() + +with beam.Pipeline(options=options) as p: + output = (p | "Read from Kafka" >> ReadFromKafka( + consumer_config={'bootstrap.servers': options.bootstrap_servers}, + topics=[options.topic], + with_metadata=False + ) + | "Log Data" >> Map(logging.info)) + +``` +Response: +This code uses the Apache Beam [KafkaIO Pythn connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.kafka.html) Python connector to read data from an [Apache Kafka](https://kafka.apache.org/) topic `topic` on Kafka bootstrap servers `bootstrap_servers`. + +The `ReadFromKafka` transform reads from a Kafka topic `topic` on `bootstrap_servers`. This is a cross-language transform that is implemented in Java and is available in Python, Go, and Typescript via X-language.The Python SDK connects to a Java expansion service to expand these transforms during pipeline execution. For more information about cross-language transforms, see Apache Beam [Multi-language pipelines](https://beam.apache.org/documentation/programming-guide/#multi-language-pipelines). + +```python +with beam.Pipeline(options=options) as p: + output = (p | "Read from Kafka" >> ReadFromKafka( + consumer_config={'bootstrap.servers': options.bootstrap_servers}, + topics=[options.topic], + with_metadata=False + ) + | "Log Data" >> Map(logging.info)) +``` +The required arguments for the `ReadFromKafka` transform are `consumer_config` and `topics`. You can use the `consumer_config` argument to specify the Kafka bootstrap servers, the `topics` argument to specify the Kafka topic to read data from, and the `with_metadata` argument to specify whether to include Kafka metadata in the output `PCollection`. The default value for the `with_metadata` argument is `False`. + +The `KafkaReadOptions` class is used to parse pipeline options for the Kafka read transform. The `bootstrap_servers` and `topic` arguments are provided as command-line arguments. This code uses [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/) to parse pipeline options. + +```python +class KafkaOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--bootstrap_servers', + default="localhost:9092", + help='Apache Kafka bootstrap servers' + ) + parser.add_argument( + '--topic', + default="your-topic", + help='Apache Kafka topic' + ) + +options = KafkaOptions() +``` + +For more information about how to use the KafkaIO connector with the Python SDK for Apache Beam, see [KafkaIO module](https://beam.apache.org/releases/pydoc/current/apache_beam.io.kafka.html). \ No newline at end of file diff --git a/learning/prompts/code-explaination/02_io_pubsub.md b/learning/prompts/code-explanation/02_io_pubsub.md similarity index 100% rename from learning/prompts/code-explaination/02_io_pubsub.md rename to learning/prompts/code-explanation/02_io_pubsub.md diff --git a/learning/prompts/code-explanation/03_io_bigquery.md b/learning/prompts/code-explanation/03_io_bigquery.md new file mode 100644 index 000000000000..2520bcc7b0d0 --- /dev/null +++ b/learning/prompts/code-explanation/03_io_bigquery.md @@ -0,0 +1,39 @@ +Prompt: +What does this code do? +```python +class BigQueryReadTableOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + "--table", + default="bigquery-public-data:samples.github_timeline", + help="BigQuery table to read data from") + +options = BigQueryReadTableOptions() +with beam.Pipeline(options=options) as p: + output = (p | "Read table" >> ReadFromBigQuery(table=options.table) + | "Log data" >> Map(logging.info) + ) +``` +Response: +This code uses the [Google BigQuery I/O connector](https://beam.apache.org/documentation/io/built-in/google-bigquery/) to read data from a [BigQuery](https://cloud.google.com/bigquery/docs) table `table`. + +The BigQueryIO connector supports reading from BigQuery tables, executing queries, and reading from the BigQuery Storage API (beta). The BigQueryIO connector supports writing to BigQuery tables in batch and streaming modes. + +```python +output = (p | "Read table" >> ReadFromBigQuery(table=options.table)) +``` +The `ReadFromBigQuery` transform uses a BigQuery export job to save a table snapshot to Google Cloud Storage and then reads from each produced file. The default file format is Avro. + +Another option is to use a SQL query to read data from BigQuery. + +```python +output = (p | "Read table" >> ReadFromBigQuery(query="SELECT * FROM table")) +``` +For more information about how to use the BigQueryIO connector with the Python SDK for Apache Beam, see [BigQueryIO module](https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.bigquery.html). + +This code uses [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/) for the required `table` argument. The `table` argument is used to specify the BigQuery table to read data from. + +For performance metrics of the BigQueryIO connector, see [BigQueryIO Performance](https://beam.apache.org/performance/bigquery/). + + diff --git a/learning/prompts/code-explanation/04_io_bigtable.md b/learning/prompts/code-explanation/04_io_bigtable.md new file mode 100644 index 000000000000..44673dcbf932 --- /dev/null +++ b/learning/prompts/code-explanation/04_io_bigtable.md @@ -0,0 +1,44 @@ +Prompt: +What does this code do? +```python +class BigtableOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--project_id', + required=True, + help='Project ID' + ) + parser.add_argument( + '--instance_id', + default="beam-test", + help='Cloud Bigtable instance ID' + ) + parser.add_argument( + '--table_id', + default="your-test-table", + help='Cloud Bigtable table ID' + ) + + options = BigtableOptions() + +with beam.Pipeline(options=options) as p: + output = (p | "Read from Bigtable" >> ReadFromBigtable( + project_id=options.project_id, + instance_id=options.instance_id, + table_id=options.table_id + ) + | "Extract cells" >> beam.Map(lambda row: row._cells) + | "Log Data" >> Map(logging.info)) +``` + +Response: +This code uses the Apache Beam [BigtableIO connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.bigtableio.html) to read data from a [Bigtable](https://cloud.google.com/bigtable/docs) table `table_id` in a Bigtable instance `instance_id`. The `table_id` and `instance_id` are provided as command-line arguments. + +This code uses [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/) to parse command-line arguments. + +The `ReadFromBigtable` transform returns a `PCollection` of `PartialRowData` objects, each representing a Bigtable row. For more information about this row object, see [PartialRowData (row_key)](https://cloud.google.com/python/docs/reference/bigtable/latest/row#class-googlecloudbigtablerowpartialrowdatarowkey). + +The data from a Bigtable row is extracted and logged to the console. + +For more information about how to use the BigtableIO connector with the Python SDK for Apache Beam, see [BigtableIO module](https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.bigtableio.html). \ No newline at end of file diff --git a/learning/prompts/code-explanation/05_io_spanner.md b/learning/prompts/code-explanation/05_io_spanner.md new file mode 100644 index 000000000000..6a17f6ce2c46 --- /dev/null +++ b/learning/prompts/code-explanation/05_io_spanner.md @@ -0,0 +1,96 @@ +Prompt: +What does this code do? +```python +class ExampleRow(NamedTuple): + id: int + name: str + +class SpannerOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--project_id', + default='your-project-id', + help='Google Cloud project ID' + ) + parser.add_argument( + '--instance_id', + default='your-instance-id', + help='Google Cloud Spanner instance ID' + ) + parser.add_argument( + '--database_id', + default='your-database-id', + help='Google Cloud Spanner database ID' + ) + +options = SpannerOptions() +coders.registry.register_coder(ExampleRow, coders.RowCoder) + +with beam.Pipeline(options=options) as p: + + output = (p | "Read from table" >> ReadFromSpanner( + project_id=options.project_id, + instance_id=options.instance_id, + database_id=options.database_id, + row_type=ExampleRow, + sql="SELECT * FROM example_row" + ) + | "Log Data" >> Map(logging.info)) + +``` +Response: +This code uses the Apache Beam [SpannerIO connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.spanner.html) to read data from a [Cloud Spanner](https://cloud.google.com/spanner) table `example_row` from database `your-database-id` in instance `your-instance-id`. The values `your-database-id`, `your-instance-id`, and `your-project-id` are provided as command-line arguments. + +```python +class SpannerOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--project_id', + default='your-project-id', + help='Google Cloud project ID' + ) + parser.add_argument( + '--instance_id', + default='your-instance-id', + help='Google Cloud Spanner instance ID' + ) + parser.add_argument( + '--database_id', + default='your-database-id', + help='Google Cloud Spanner database ID' + ) + +options = SpannerOptions() +``` +This code uses [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/) to parse command-line arguments. The `SpannerOptions` class defines the command-line arguments `project_id`, `instance_id`, and `database_id`, which are used to configure the `ReadFromSpanner` transform. + +```python +class ExampleRow(NamedTuple): + id: int + name: str +``` +This code defines a `NamedTuple` object `ExampleRow` that represents a Cloud Spanner row. The `NamedTuple` object has the fields `id` and `name`, which represent a Cloud Spanner row. The `NamedTuple` object is used as a row type in the `ReadFromSpanner` transform. + +```python + coders.registry.register_coder(ExampleRow, coders.RowCoder) +``` +Registering a coder for `NamedTuple` is required to use `NamedTuple` as a row type. For more information about how to register a coder for a custom type, see [Data encoding and type safety](https://beam.apache.org/documentation/programming-guide/#data-encoding-and-type-safety). + +```python +output = (p | "Read from table" >> ReadFromSpanner( + project_id=options.project_id, + instance_id=options.instance_id, + database_id=options.database_id, + row_type=ExampleRow, + sql="SELECT * FROM example_row" + ) + | "Log Data" >> Map(logging.info)) +``` + +The `ReadFromSpanner` transform returns a `PCollection` of `NamedTuple` objects, each representing a Cloud Spanner row. For more information about this row object, see [ReadFromSpanner transform](https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.spanner.html#apache_beam.io.gcp.spanner.ReadFromSpanner). + +The data from a Cloud Spanner row is logged to the console. + + diff --git a/learning/prompts/code-explanation/06_io_tfrecord.md b/learning/prompts/code-explanation/06_io_tfrecord.md new file mode 100644 index 000000000000..f622a51a44f0 --- /dev/null +++ b/learning/prompts/code-explanation/06_io_tfrecord.md @@ -0,0 +1,63 @@ +Prompt: +What does this code do? +```python + +def map_from_bytes(element): + return pickle.loads(element) + +class TFRecordOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + + parser.add_argument( + "--file_pattern", + help="A file glob pattern of TFRecord files" + ) + + +options = TFRecordOptions() +with beam.Pipeline(options=options) as p: + +output = ( + p + "Read from TFRecord" >> ReadFromTFRecord( + file_pattern=options.file_pattern + ) + | "Map from bytes" >> Map(map_from_bytes) + | "Log Data" >> Map(logging.info) +) + +``` +Response: +This code uses the Apache Beam [TFRecordIO connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.tfrecordio.html) to read data from a TensorFlow [TFRecord file](https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset) matched with `file_pattern`. The `file_pattern` parameter is provided as a command-line argument. + +```python +class TFRecordOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + + parser.add_argument( + "--file_pattern", + help="A file glob pattern of TFRecord files" + ) + +options = TFRecordOptions() +``` +This code uses [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/) to parse command-line arguments. + +```python +output = ( + p + "Read from TFRecord" >> ReadFromTFRecord( + file_pattern=options.file_pattern + ) + | "Map from bytes" >> Map(map_from_bytes) + | "Log Data" >> Map(logging.info) +) +``` + +The [ReadFromTFRecord transform](https://beam.apache.org/releases/pydoc/current/apache_beam.io.tfrecordio.html#apache_beam.io.tfrecordio.ReadFromTFRecord) reads from a `TFRecord` source. By default, TFRecordIO transforms use `coders.BytesCoder()`. The `Map` transform applies the `map_from_bytes` function to deserialize bytes to a Python object. The deserialized object is logged to the console. + + + + diff --git a/learning/prompts/code-explanation/07_io_json.md b/learning/prompts/code-explanation/07_io_json.md new file mode 100644 index 000000000000..46e3099d9f2b --- /dev/null +++ b/learning/prompts/code-explanation/07_io_json.md @@ -0,0 +1,50 @@ +Prompt: +What does this code do? +```python +class JsonOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--file_path', + default="gs://your-bucket/your-file.json", + help='Json file path' + ) + +options = JsonOptions() + +with beam.Pipeline(options=options) as p: + output = (p | "Read from Json file" >> ReadFromJson( + path=options.file_path, + lines=False + ) + | "Log Data" >> Map(logging.info)) +``` +Response: +This code reads data from a [JSON file](https://www.json.org/) using the `ReadFromJson` transform from the built-in [TextIO connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.textio.html). + +```python +class JsonOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--file_path', + default="gs://your-bucket/your-file.json", + help='Json file path' + ) + +options = JsonOptions() + +``` +The `JsonOptions` class is used to define the command-line argument `--file_path`, which specifies the path to the JSON file. This code uses [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/) for the required `file_path` argument. + +```python +with beam.Pipeline(options=options) as p: + output = (p | "Read from Json file" >> ReadFromJson( + path=options.file_path, + lines=False + ) + | "Log Data" >> Map(logging.info)) +``` +A Beam pipeline is created using the `JsonOptions` class and the `ReadFromJson` transform is used to read data from the JSON file. The `lines` parameter is set to `False` to read the entire file as a single JSON object. + + diff --git a/learning/prompts/code-explanation/08_io_csv.md b/learning/prompts/code-explanation/08_io_csv.md new file mode 100644 index 000000000000..22b75310409c --- /dev/null +++ b/learning/prompts/code-explanation/08_io_csv.md @@ -0,0 +1,44 @@ +Prompt: +What does this code do? +```python +class CsvOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--file_path', + default="gs://your-bucket/your-file.csv", + help='Csv file path' + ) + + +options = CsvOptions() + +with beam.Pipeline(options=options) as p: + output = (p | "Read from Csv file" >> ReadFromCsv(path=options.file_path) + | "Log Data" >> Map(logging.info)) + +``` +Response: +This code reads data from a [CSV file](https://en.wikipedia.org/wiki/Comma-separated_values) using the `ReadFromCsv` transform from the built-in [TextIO connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.textio.html). + +```python +class CsvOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--file_path', + default="gs://your-bucket/your-file.csv", + help='Csv file path' + ) + +options = CsvOptions() +``` + +The `CsvOptions` class is used to define the command-line argument `--file_path`, which specifies the path to the CSV file. This code uses [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/) for the required `file_path` argument. + +```python +with beam.Pipeline(options=options) as p: + output = (p | "Read from Csv file" >> ReadFromCsv(path=options.file_path) + | "Log Data" >> Map(logging.info)) +``` +A Beam pipeline is created using the `CsvOptions` class and the [ReadFromCsv transform](https://beam.apache.org/releases/pydoc/current/apache_beam.io.textio.html#apache_beam.io.textio.ReadFromCsv) is used to read data from the CSV file. diff --git a/learning/prompts/code-explanation/09_io_avro.md b/learning/prompts/code-explanation/09_io_avro.md new file mode 100644 index 000000000000..9dfe43e786b7 --- /dev/null +++ b/learning/prompts/code-explanation/09_io_avro.md @@ -0,0 +1,41 @@ +Prompt: +What does this code do? +```python +class ReadAvroOptions(PipelineOptions): + +@classmethod +def _add_argparse_args(cls, parser): + parser.add_argument( + "--path", + help="GCS path to Avro file") + + options = ReadAvroOptions() + + with beam.Pipeline(options=options) as p: + + (p | "Read Avro" >> ReadFromAvro(options.path) + | Map(logging.info)) +``` +Response: +This code reads data from [Apache Avro](https://avro.apache.org/) files using the `ReadFromAvro` transform from the built-in [AvroIO connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.avroio.html). + +```python +class ReadAvroOptions(PipelineOptions): + +@classmethod +def _add_argparse_args(cls, parser): + parser.add_argument( + "--path", + help="GCS path to Avro file") + + options = ReadAvroOptions() +``` +The `ReadAvroOptions` class is used to define the command-line argument `--path`, which specifies the path to the Avro file. This code uses [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/) for the required `path` argument. + +```python + with beam.Pipeline(options=options) as p: + + (p | "Read Avro" >> ReadFromAvro(options.path) + | Map(logging.info)) +``` +A Beam pipeline is created using the `ReadAvroOptions` class and the [ReadFromAvro transform](https://beam.apache.org/releases/pydoc/current/apache_beam.io.avroio.html#apache_beam.io.avroio.ReadFromAvro) is used to read data from the Avro file. diff --git a/learning/prompts/code-explanation/10_io_parquet.md b/learning/prompts/code-explanation/10_io_parquet.md new file mode 100644 index 000000000000..5fb1c821bf71 --- /dev/null +++ b/learning/prompts/code-explanation/10_io_parquet.md @@ -0,0 +1,39 @@ +Prompt: +What does this code do? +```python +class ReadParquetOptions(PipelineOptions): + + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + "--path", + help="GCS path to parquet file") + + options = ReadParquetOptions() + +with beam.Pipeline(options=options) as p: + (p | "Read Parquet" >> ReadFromParquet(options.path) + | Map(logging.info)) +``` +Response: +This code reads data from [Apache Parquet](https://parquet.apache.org/) files using the `ReadFromParquet` transform from the built-in [ParquetIO connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.parquetio.html). + +```python +class ReadParquetOptions(PipelineOptions): + + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + "--path", + help="GCS path to parquet file") + + options = ReadParquetOptions() +``` +The `ReadParquetOptions` class is used to define the command-line argument `--path`, which specifies the path to the Parquet file. This code uses [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/) for the required `path` argument. + +```python +with beam.Pipeline(options=options) as p: + (p | "Read Parquet" >> ReadFromParquet(options.path) + | Map(logging.info)) +``` +A Beam pipeline is created using the `ReadParquetOptions` class and the [ReadFromParquet transform](https://beam.apache.org/releases/pydoc/current/apache_beam.io.parquetio.html#apache_beam.io.parquetio.ReadFromParquet) is used to read data from the Parquet file. diff --git a/learning/prompts/code-explanation/11_io_jdbc.md b/learning/prompts/code-explanation/11_io_jdbc.md new file mode 100644 index 000000000000..72b5f02a5f89 --- /dev/null +++ b/learning/prompts/code-explanation/11_io_jdbc.md @@ -0,0 +1,105 @@ +Prompt: +What does this code do? +```python +class ExampleRow(NamedTuple): + id: int + name: str + +class JdbcOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--table_name', + help='Table name' + ) + parser.add_argument( + '--jdbc_url', + help='JDBC URL' + ) + parser.add_argument( + '--driver_class_name', + help='Driver class name' + ) + parser.add_argument( + '--username', + help='Username' + ) + parser.add_argument( + '--password', + help='Password' + ) + +options = JdbcOptions() +coders.registry.register_coder(ExampleRow, coders.RowCoder) + +with beam.Pipeline(options=options) as p: + + output = (p | 'Read from JDBC' >> ReadFromJdbc( + table_name=options.table_name, + driver_class_name=options.driver_class_name, + jdbc_url=options.jdbc_url, + username=options.username, + password=options.password + ) + | 'Log Data' >> Map(logging.info)) +``` +Response: +This code reads data from an [Oracle Java Database Connectivity (JDBC) enabled database](https://docs.oracle.com/javase/8/docs/technotes/guides/jdbc/) using the `ReadFromJdbc` transform from the built-in [JdbcIO connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.jdbc.html). + +```python +class ExampleRow(NamedTuple): + id: int + name: str +``` +This code defines a named tuple `ExampleRow` with two fields: `id` and `name`. + +```python +class JdbcOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--table_name', + help='Table name' + ) + parser.add_argument( + '--jdbc_url', + help='JDBC URL' + ) + parser.add_argument( + '--driver_class_name', + help='Driver class name' + ) + parser.add_argument( + '--username', + help='Username' + ) + parser.add_argument( + '--password', + help='Password' + ) + +options = JdbcOptions() +``` + +The `JdbcOptions` class is used to define the command-line arguments `--table_name`, `--jdbc_url`, `--driver_class_name`, `--username`, and `--password`, which specify the JDBC connection parameters. This code uses [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/) to parse command-line arguments. + +```python +coders.registry.register_coder(ExampleRow, coders.RowCoder) +``` + +This code registers a coder for the `ExampleRow` class. This is required for the `ReadFromJdbc` transform to read data from the table. + +```python +with beam.Pipeline(options=options) as p: + + output = (p | 'Read from JDBC' >> ReadFromJdbc( + jdbc_url=options.jdbc_url, + username=options.username, + password=options.password, + table_name=options.table_name, + driver_class_name=options.driver_class_name + ) + | 'Log Data' >> Map(logging.info)) +``` + +A Beam pipeline is created using the `JdbcOptions` class and the [ReadFromJdbc transform](https://beam.apache.org/releases/pydoc/current/apache_beam.io.jdbc.html#apache_beam.io.jdbc.ReadFromJdbc) is used to read data from the database via JDBC. \ No newline at end of file diff --git a/learning/prompts/code-generation/01_io_kafka.md b/learning/prompts/code-generation/01_io_kafka.md new file mode 100644 index 000000000000..e169e012ef42 --- /dev/null +++ b/learning/prompts/code-generation/01_io_kafka.md @@ -0,0 +1,46 @@ +Prompt: +Write the Python code to read data from Apache Kafka using Apache Beam. + +Response: +You can read data from [Apache Kafka](https://kafka.apache.org/) using the `ReadFromKafka` transform. The following Python code reads a topic `topic` from a Kafka server `bootstrap_servers`. The values for `topic` and `bootstrap_servers` are provided as command-line arguments. The data is logged to the console. + +```python +import logging +import apache_beam as beam + +from apache_beam import Map +from apache_beam.io.kafka import ReadFromKafka +from apache_beam.options.pipeline_options import PipelineOptions + +class KafkaOptions(PipelineOptions): +""" +Configure pipeline options for Kafka read transform. +""" + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--bootstrap_servers', + default="localhost:9092", + help='Apache Kafka bootstrap servers' + ) + parser.add_argument( + '--topic', + default="your-topic", + help='Apache Kafka topic' + ) + +#parse pipeline options +options = KafkaOptions() + +with beam.Pipeline(options=options) as p: + #this pipeline reads from a Kafka topic and logs the data to the console + output = (p | "Read from Kafka" >> ReadFromKafka( + consumer_config={'bootstrap.servers': options.bootstrap_servers}, + topics=[options.topic], + with_metadata=False + ) + | "Log Data" >> Map(logging.info)) +``` +For more information about how to use the KafkaIO connector with the Python SDK for Apache Beam, see the [KafkaIO connector documentation](https://beam.apache.org/releases/pydoc/current/apache_beam.io.kafka.html). + +For samples that show common pipeline configurations to parse pipeline options from command-line arguments, see [Pipeline option patterns](https://beam.apache.org/documentation/patterns/pipeline-options/). \ No newline at end of file diff --git a/learning/prompts/code-generation/03_io_bigquery.md b/learning/prompts/code-generation/03_io_bigquery.md new file mode 100644 index 000000000000..b13be4fd355a --- /dev/null +++ b/learning/prompts/code-generation/03_io_bigquery.md @@ -0,0 +1,54 @@ +Prompt: +Write the Python code to read data from BigQuery using Apache Beam. + +Response: +You can read data from BigQuery using the `ReadFromBigQuery` transform. The following Python code reads data from a BigQuery table. The table name is provided as the command-line argument `table`. The data is logged to the console. + +```python +import logging + +import apache_beam as beam +from apache_beam.io import ReadFromBigQuery +from apache_beam.options.pipeline_options import PipelineOptions + +class BigQueryReadOptions(PipelineOptions): +""" +Configure pipeline options for BigQuery read transform. +""" + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + "--table", + required=True, + help="BigQuery table to read from.") + +def read_table(): + """Read from BigQuery table function.""" + + #parse pipeline options + #streaming=True is required for a streaming pipeline + options = BigQueryReadOptions(streaming=True) + + with beam.Pipeline(options=options) as p: + #this pipeline reads from a BigQuery table and logs the data to the console + (p | "Read BigQuery table" >> ReadFromBigQuery(table=options.table) + | "Format data" >> Map(lambda row: f"Received row:\n{row}\n") + | Map(logging.info)) + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.INFO) + read_table() +``` +The `ReadFromBigQuery` transform also supports reading from a BigQuery query. The following Python code reads data from a BigQuery table using a SQL query. The query is provided as the command-line argument `query`. The data is logged to the console. + +```python +with beam.Pipeline(options=options) as p: + p | "Read BigQuery table" >> ReadFromBigQuery(query='SELECT * FROM table') + | "Format data" >> Map(lambda row: f"Received row:\n{row}\n") + | Map(logging.info) + ``` + +For more information, see the [BigQueryIO connector documentation](https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.bigquery.html). + +For samples that show common pipeline configurations, see [Pipeline option patterns](https://beam.apache.org/documentation/patterns/pipeline-options/). + diff --git a/learning/prompts/code-generation/04_io_bigtable.md b/learning/prompts/code-generation/04_io_bigtable.md new file mode 100644 index 000000000000..22ea5ebcef9e --- /dev/null +++ b/learning/prompts/code-generation/04_io_bigtable.md @@ -0,0 +1,66 @@ +Prompt: +Write the Python code to read data from Bigtable using Apache Beam. + +Response: +You can read data from [Cloud Bigtable](https://cloud.google.com/bigtable) using the `ReadFromBigtable` transform. The following Python code reads a table `table_id` from a Bigtable instance `instance_id`. The `table_id` and `instance_id` are provided as command-line arguments. The data is logged to the console. + +```python +import logging +import apache_beam as beam + +from apache_beam import Map +from apache_beam.io.gcp.bigtableio import ReadFromBigtable +from apache_beam.options.pipeline_options import PipelineOptions + +class BigtableOptions(PipelineOptions): +""" +Configure pipeline options for Bigtable read transform. +""" + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--project_id', + required=True, + help='Project ID' + ) + parser.add_argument( + '--instance_id', + default="beam-test", + help='Cloud Bigtable instance ID' + ) + parser.add_argument( + '--table_id', + default="your-test-table", + help='Cloud Bigtable table ID' + ) + + +def run(): + """ + This pipeline shows how to read from Cloud Bigtable. + """ + + #parse pipeline options + options = BigtableOptions() + + with beam.Pipeline(options=options) as p: + #this pipeline reads from a BigQuery table and logs the data to the console + output = (p | "Read from Bigtable" >> ReadFromBigtable( + project_id=options.project_id, + instance_id=options.instance_id, + table_id=options.table_id + ) + | "Extract cells" >> beam.Map(lambda row: row._cells) + | "Log Data" >> Map(logging.info)) + + +if __name__ == "__main__": + logging.getLogger().setLevel(logging.INFO) + run() +``` +The `ReadFromBigtable` transform returns a `PCollection` of `PartialRowData` objects, each representing a Bigtable row. For more information about this row object, see [PartialRowData (row_key)](https://cloud.google.com/python/docs/reference/bigtable/latest/row#class-googlecloudbigtablerowpartialrowdatarowkey) + +For more information, see the [BigTableIO connector documentation](https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.bigtableio.html). + +For samples that show common pipeline configurations, see [Pipeline option patterns](https://beam.apache.org/documentation/patterns/pipeline-options/). + diff --git a/learning/prompts/code-generation/05_io_spanner.md b/learning/prompts/code-generation/05_io_spanner.md new file mode 100644 index 000000000000..24453ff01beb --- /dev/null +++ b/learning/prompts/code-generation/05_io_spanner.md @@ -0,0 +1,63 @@ +Prompt: +Write the Python code to read data from Cloud Spanner using Apache Beam. + +Response: +You can read data from [Cloud Spanner](https://cloud.google.com/spanner) using the Apache Beam [SpannerIO connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.spanner.html). The SpannerIO connector lets you read data from and write data to Cloud Spanner. Cloud Spanner is supported in both batch pipelines and streaming pipelines. + +```python +import logging +import apache_beam as beam + +from apache_beam import coders +from apache_beam import Map +from apache_beam.io.gcp.spanner import ReadFromSpanner +from apache_beam.options.pipeline_options import PipelineOptions +from typing import NamedTuple + +class ExampleRow(NamedTuple): + id: int + name: str + +class SpannerOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--project_id', + default='your-project-id', + help='Google Cloud project ID' + ) + parser.add_argument( + '--instance_id', + default='your-instance-id', + help='Google Cloud Spanner instance ID' + ) + parser.add_argument( + '--database_id', + default='your-database-id', + help='Google Cloud Spanner database ID' + ) + +options = SpannerOptions() +coders.registry.register_coder(ExampleRow, coders.RowCoder) + +with beam.Pipeline(options=options) as p: + + output = (p | "Read from table" >> ReadFromSpanner( + project_id=options.project_id, + instance_id=options.instance_id, + database_id=options.database_id, + row_type=ExampleRow, + sql="SELECT * FROM example_row" + ) + | "Log Data" >> Map(logging.info)) +``` + +The `ReadFromSpanner` transform is a built-in Apache Beam transform that reads data from a Cloud Spanner table. The `ReadFromSpanner` transform returns a `PCollection` of `NamedTuple` objects. The `NamedTuple` object is a Python class that represents a row in a Cloud Spanner table. + +Registering a coder for `NamedTuple` is required to use `NamedTuple` as a row type: +```python + coders.registry.register_coder(ExampleRow, coders.RowCoder) +``` +For more information about how to register a coder for a custom type, see [Data encoding and type safety](https://beam.apache.org/documentation/programming-guide/#data-encoding-and-type-safety). + +The `SpannerOptions` class defines the command-line arguments `project_id`, `instance_id` and `database_id`, which are used to configure the `ReadFromSpanner` transform. These arguments are parsed from the command line using [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/). \ No newline at end of file diff --git a/learning/prompts/code-generation/06_io_tfrecord.md b/learning/prompts/code-generation/06_io_tfrecord.md new file mode 100644 index 000000000000..7f36540b97b8 --- /dev/null +++ b/learning/prompts/code-generation/06_io_tfrecord.md @@ -0,0 +1,60 @@ +Prompt: +Write the Python code to read data from a TensorFlow TFRecord file using Apache Beam. + +Response: +You can read data from a TensorFlow [TFRecord file](https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset) using the Apache Beam [TFRecordIO connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.tfrecordio.html). The TFRecordIO connector lets you read and write data in `TFRecord` format. + +```python +import logging + +import apache_beam as beam +from apache_beam import Map +from apache_beam.io.tfrecordio import ReadFromTFRecord +from apache_beam.options.pipeline_options import PipelineOptions + + +class TFRecordOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + "--file_pattern", + help="A file glob pattern to read TFRecords from." + ) + +def run(): + options = TFRecordOptions() + + with beam.Pipeline(options=options) as p: + + output = ( + p + | "Read from TFRecord" >> ReadFromTFRecord( + file_pattern=options.file_pattern + ) + | "Map from bytes" >> Map(map_from_bytes) + | "Log Data" >> Map(logging.info) + ) + + +def map_from_bytes(element): + """ + Deserializes the input bytes using pickle library and + returns the reconstructed object. + """ + # third party libraries + import pickle + + return pickle.loads(element) + + +if __name__ == "__main__": + logging.getLogger().setLevel(logging.INFO) + run() + +``` + +The `TFRecordOptions` class defines the command-line argument `file_pattern`, which is used to configure the `ReadFromTFRecord` transform. This argument is parsed from the command line using [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/). + +The `ReadFromTFRecord` transform is a built-in Apache Beam transform that reads data from a `TFRecord` file. By default, `TFRecordIO` transforms use `coders.BytesCoder()`. For more information, see [ReadFromTFRecord transform](https://beam.apache.org/releases/pydoc/current/apache_beam.io.tfrecordio.html#apache_beam.io.tfrecordio.ReadFromTFRecord). + + diff --git a/learning/prompts/code-generation/07_io_json.md b/learning/prompts/code-generation/07_io_json.md new file mode 100644 index 000000000000..6c142b49c450 --- /dev/null +++ b/learning/prompts/code-generation/07_io_json.md @@ -0,0 +1,48 @@ +Prompt: +Write the Python code to read data from JSON files using Apache Beam. + +Response: +You can read data from [JSON files](https://www.json.org/) using the Apache Beam [TextIO connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.textio.html). + +```python +import logging +import apache_beam as beam + +from apache_beam import Map +from apache_beam.io.textio import ReadFromJson +from apache_beam.options.pipeline_options import PipelineOptions + + +class JsonOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--file_path', + default="gs://your-bucket/your-file.json", + help='Json file path' + ) + + +def run(): + """ + This pipeline reads from Json file defined by the --file_path argument. + """ + + options = JsonOptions() + + with beam.Pipeline(options=options) as p: + + output = p | "Read from Json file" >> ReadFromJson( + path=options.file_path, + lines=False + ) + + +if __name__ == "__main__": + logging.getLogger().setLevel(logging.INFO) + run() +``` +The `JsonOptions` class is a custom class that extends the `PipelineOptions` object. It is used to parse pipeline options from command-line arguments. This code uses [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/) to parse command-line arguments. + +The [ReadFromJson](https://beam.apache.org/releases/pydoc/current/apache_beam.io.textio.html#apache_beam.io.textio.ReadFromJson) transform is a built-in Apache Beam transform that reads data from a JSON file. + diff --git a/learning/prompts/code-generation/08_io_csv.md b/learning/prompts/code-generation/08_io_csv.md new file mode 100644 index 000000000000..3888e15bb704 --- /dev/null +++ b/learning/prompts/code-generation/08_io_csv.md @@ -0,0 +1,47 @@ +Prompt: +Write the Python code to read data from CSV files using Apache Beam. + +Response: +You can read data from [CSV files](https://en.wikipedia.org/wiki/Comma-separated_values) using the Apache Beam [TextIO connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.textio.html). + +```python +import logging +import apache_beam as beam + +from apache_beam import Map +from apache_beam.io.textio import ReadFromCsv +from apache_beam.options.pipeline_options import PipelineOptions + + +class CsvOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--file_path', + help='Csv file path' + ) + + +def run(): + """ + This pipeline shows how to read from Csv file. + """ + + options = CsvOptions() + + with beam.Pipeline(options=options) as p: + + output = (p | "Read from Csv file" >> ReadFromCsv( + path=options.file_path + ) + | "Log Data" >> Map(logging.info)) + + +if __name__ == "__main__": + logging.getLogger().setLevel(logging.INFO) + run() +``` + +The `CsvOptions` class is a custom class that extends the `PipelineOptions` object. It is used to parse pipeline options from command-line arguments. This code uses [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/) to parse command-line arguments. + +The [ReadFromCsv transform](https://beam.apache.org/releases/pydoc/current/apache_beam.io.textio.html#apache_beam.io.textio.ReadFromCsv) is a built-in Apache Beam transform that reads data from a CSV file. \ No newline at end of file diff --git a/learning/prompts/code-generation/09_io_avro.md b/learning/prompts/code-generation/09_io_avro.md new file mode 100644 index 000000000000..2ccf5427822c --- /dev/null +++ b/learning/prompts/code-generation/09_io_avro.md @@ -0,0 +1,42 @@ +Prompt: +Write the Python code to read data from Apache Avro files using Apache Beam. + +Response: +You can read data from [Apache Avro](https://avro.apache.org/) files using the Apache Beam [AvroIO connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.avroio.html). + +```python +import logging + +import apache_beam as beam +from apache_beam import Map +from apache_beam.io.avroio import ReadFromAvro +from apache_beam.options.pipeline_options import PipelineOptions + + +def run(argv=None): + + class ReadAvroOptions(PipelineOptions): + + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + "--path", + help="GCS path to Avro file") + + options = ReadAvroOptions() + + with beam.Pipeline(options=options) as p: + + (p | "ReadAvro" >> ReadFromAvro(options.path) + | Map(logging.info)) + + +if __name__ == "__main__": + logging.getLogger().setLevel(logging.INFO) + run() +``` +The `ReadAvroOptions` class is used to define the command-line argument `--path`, which specifies the path to the Avro file. This code uses [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/) for the required `path` argument. + +A Beam pipeline is created using the `ReadAvroOptions` class and the [ReadFromAvro transform](https://beam.apache.org/releases/pydoc/current/apache_beam.io.avroio.html#apache_beam.io.avroio.ReadFromAvro) is used to read data from the Avro file. + + diff --git a/learning/prompts/code-generation/10_io_parquet.md b/learning/prompts/code-generation/10_io_parquet.md new file mode 100644 index 000000000000..5493c5e14d55 --- /dev/null +++ b/learning/prompts/code-generation/10_io_parquet.md @@ -0,0 +1,39 @@ +Prompt: +Write the Python code to read data from Apache Parquet files using Apache Beam. + +Response: +You can read data from [Apache Parquet](https://parquet.apache.org/) files using the Apache Beam [ParquetIO connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.parquetio.html). +```python +import logging + +import apache_beam as beam +from apache_beam import Map +from apache_beam.io.parquetio import ReadFromParquet +from apache_beam.options.pipeline_options import PipelineOptions + + +def run(argv=None): + + class ReadParquetOptions(PipelineOptions): + + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + "--path", + help="GCS path to Parquet file") + + options = ReadParquetOptions() + + with beam.Pipeline(options=options) as p: + + (p | "Read Parquet" >> ReadFromParquet(options.path) + | Map(logging.info)) + + +if __name__ == "__main__": + logging.getLogger().setLevel(logging.INFO) + run() +``` +The `ReadParquetOptions` class is used to define the command-line argument `--path`, which specifies the path to the Parquet file. This code uses [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/) for the required `path` argument. + +A Beam pipeline is created using the `ReadParquetOptions` class and the [ReadFromParquet transform](https://beam.apache.org/releases/pydoc/current/apache_beam.io.parquetio.html#apache_beam.io.parquetio.ReadFromParquet) is used to read data from the Parquet file. diff --git a/learning/prompts/code-generation/11_io_jdbc.md b/learning/prompts/code-generation/11_io_jdbc.md new file mode 100644 index 000000000000..9d8a98d6e649 --- /dev/null +++ b/learning/prompts/code-generation/11_io_jdbc.md @@ -0,0 +1,75 @@ +Prompt: +Write the Python code to read data from Oracle JDBC using Apache Beam. + +Response: +You can read data from an Oracle Java Database Connectivity (JDBC) enabled database using the Apache Beam [JdbcIO connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.jdbc.html). +```python +import logging +import apache_beam as beam +import typing + +from apache_beam import coders +from apache_beam import Map +from apache_beam.io.jdbc import ReadFromJdbc +from apache_beam.options.pipeline_options import PipelineOptions +from typing import NamedTuple + + +class ExampleRow(NamedTuple): + id: int + name: str + + +class JdbcOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--table_name', + help='Table name' + ) + parser.add_argument( + '--jdbc_url', + help='JDBC URL' + ) + parser.add_argument( + '--driver_class_name', + help='Driver class name' + ) + parser.add_argument( + '--username', + help='Username' + ) + parser.add_argument( + '--password', + help='Password' + ) + + +def run(): + """ + This pipeline shows how to read from JDBC. + """ + + options = JdbcOptions() + coders.registry.register_coder(ExampleRow, coders.RowCoder) + + with beam.Pipeline(options=options) as p: + + output = (p | 'Read from JDBC' >> ReadFromJdbc( + table_name=options.table_name, + driver_class_name=options.driver_class_name, + jdbc_url=options.jdbc_url, + username=options.username, + password=options.password + ) + | 'Log Data' >> Map(logging.info)) + + +if __name__ == "__main__": + logging.getLogger().setLevel(logging.INFO) + run() +``` + +The `JdbcOptions` class is a custom class that extends the `PipelineOptions` object. It is used to parse pipeline options from command-line arguments. This code uses [pipeline options](https://beam.apache.org/documentation/patterns/pipeline-options/). + +A Beam pipeline is created using the `JdbcOptions` class and the [ReadFromJdbc transform](https://beam.apache.org/releases/pydoc/current/apache_beam.io.jdbc.html#apache_beam.io.jdbc.ReadFromJdbc) is used to read data from the database via JDBC. diff --git a/learning/prompts/documentation-lookup/18_io_kafka.md b/learning/prompts/documentation-lookup/18_io_kafka.md new file mode 100644 index 000000000000..28c22508109d --- /dev/null +++ b/learning/prompts/documentation-lookup/18_io_kafka.md @@ -0,0 +1,27 @@ +Prompt: +Is Apache Kafka supported in Apache Beam? + +Response: +[Apache Kafka](https://kafka.apache.org/) is a distributed streaming platform that is used for building real-time data pipelines and streaming applications. It is horizontally scalable, fault-tolerant, and fast. +Apache Beam has a built-in KafkaIO connector for Apache Kafka that lets you read data from and write data to Kafka. Apache Kafka is supported in both batch pipelines and streaming pipelines, and as a source and a sink. For more information, see the KafkaIO connector documentation: +* [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/kafka/KafkaIO.html) +* [Python (using cross-language transforms)](https://beam.apache.org/releases/pydoc/current/apache_beam.io.kafka.html) +* [Go (using cross-language transforms)](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/xlang/kafkaio) +* [Typescript (using cross-language transforms)](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/kafka.ts) + +The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with Apache Kafka and Apache Beam. + +Apache Beam pipeline code for reading data from a Kafka topic might look like the following example: + +```python +with beam.Pipeline(options=options) as p: + output = (p | "Read from Kafka" >> ReadFromKafka( + consumer_config={'bootstrap.servers': 'localhost:9092'}, + topics=['my_topic'], + with_metadata=False) + | "Log Data" >> Map(logging.info)) +``` +This code reads data from a Kafka topic `my_topic` on `localhost:9092` and logs the data to the console. + +The Apache Beam repository has a [KafkaIO example](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/examples/kafkataxi/kafka_taxi.py) that demonstrates how to use the KafkaIO connector. + diff --git a/learning/prompts/documentation-lookup/21_io_bigtable.md b/learning/prompts/documentation-lookup/21_io_bigtable.md new file mode 100644 index 000000000000..60f564256f4b --- /dev/null +++ b/learning/prompts/documentation-lookup/21_io_bigtable.md @@ -0,0 +1,28 @@ +Prompt: +Is Cloud BigTable supported in Apache Beam? + +Response: +[Cloud Bigtable](https://cloud.google.com/bigtable) is a [Google Cloud](https://cloud.google.com/) service that provides enterprise-grade NoSQL database service with single-digit millisecond latency, limitless scale, and 99.999% availability for large analytical and operational workloads. + +Apache Beam has a built-in connector for Cloud Bigtable that lets you read data from and write data to Bigtable. For more information, see the BigtableIO connector documentation: +* [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.html) +* [Python](https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.bigtableio.html) +* [Python (using cross-language transforms)](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/xlang/bigtableio) +* [Go](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/bigtableio) +* [Go (using cross-language transforms)](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/xlang/bigtableio) + +The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with Cloud Bigtable and Apache Beam. + +Apache Beam pipeline code for reading data from a Bigtable table might look like the following example: + +```python +with beam.Pipeline(options=options) as p: +output = (p | "Read from Bigtable" >> ReadFromBigtable( + project_id=project_id, + instance_id=instance_id, + table_id=table_id, + ) + | "Extract cells" >> beam.Map(lambda row: row._cells) + | "Log Data" >> Map(logging.info)) +``` +For performance benchmarks of the Bigtable IO connector, see [Bigtable performance](https://beam.apache.org/performance/bigtable/). \ No newline at end of file diff --git a/learning/prompts/documentation-lookup/22_io_spanner.md b/learning/prompts/documentation-lookup/22_io_spanner.md new file mode 100644 index 000000000000..b7a4cfb18aad --- /dev/null +++ b/learning/prompts/documentation-lookup/22_io_spanner.md @@ -0,0 +1,31 @@ +Prompt: +Is Cloud Spanner supported in Apache Beam? + +Response: +[Cloud Spanner](https://cloud.google.com/spanner) is a [Google Cloud](https://cloud.google.com/) service that provides a fully managed, mission-critical, relational database service that offers transactional consistency at global scale, schemas, SQL (ANSI 2011 with extensions), and automatic, synchronous replication for high availability. + +Apache Beam has a built-in SpannerIO connector for Cloud Spanner that lets you read data from and write data to Cloud Spanner. Cloud Spanner is supported in both batch pipelines and streaming pipelines, and as a source and a sink. For more information, see the SpannerIO connector documentation: +* [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.html) +* [Python (using cross-language transforms)](https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.spanner.html) +* [Go](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/spannerio) + +The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with Cloud Spanner and Apache Beam. + +Apache Beam pipeline code for reading data from a Cloud Spanner table using a SQL query might look like the following example: + +```python +class ExampleRow(NamedTuple): + id: int + name: str + +with beam.Pipeline(options=options) as p: + output = (p | "Read from table" >> ReadFromSpanner( + project_id=options.project_id, + instance_id=options.instance_id, + database_id=options.database_id, + row_type=ExampleRow, + sql="SELECT * FROM example_row" + ) + | "Map Data" >> Map(lambda row: f"Id = {row.id}, Name = {row.name}") + | "Log Data" >> Map(logging.info)) +``` diff --git a/learning/prompts/documentation-lookup/23_io_gcs.md b/learning/prompts/documentation-lookup/23_io_gcs.md new file mode 100644 index 000000000000..e67d6fb02ccf --- /dev/null +++ b/learning/prompts/documentation-lookup/23_io_gcs.md @@ -0,0 +1,23 @@ +Prompt: +Is Google Cloud Storage file system supported in Apache Beam? + +Response: +[Google Cloud Storage](https://cloud.google.com/storage) is an object storage service for storing and accessing data on Google Cloud. Apache Beam supports both working with the Google Cloud Storage file system and reading data from and writing data to Google Cloud Storage buckets. + +Apache Beam has a built-in TextIO connector for reading data from and writing data to Google Cloud Storage buckets. For more information, see the TextIO connector documentation: +* [Java connector](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/TextIO.html) +* [Python connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.textio.html) +* [Go connector](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/textio) +* [Typescript connector (using cross-language transforms)](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/textio.ts) + +Reading and writing data in different file formats in Google Cloud Storage buckets is also supported in the AvroIO, XMLIO, TFRecordIO, and ParquetIO connectors. + +Specify the Google Cloud Storage path using the following format: `gs:///`. For example, `gs://my-bucket/my-file.txt`. +You can use wildcards in read transforms and write transforms to read data from and write data to multiple files. For example, `gs://my-bucket/my-files-*.txt`. + +Apache Beam also has support for the Google Cloud Storage file system that lets you work with the Google Cloud Storage file system directly. For example, you can use the Google Cloud Storage file system to check whether a file exists, list files in a directory, or delete a file. The Google Cloud Storage file system is supported in the following languages: +* [Java file system](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystemRegistrar.html) +* [Python file system](https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.gcsfilesystem.html) +* [Go file system](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem/gcs) + + diff --git a/learning/prompts/documentation-lookup/24_io_json.md b/learning/prompts/documentation-lookup/24_io_json.md new file mode 100644 index 000000000000..b1c2d89d7429 --- /dev/null +++ b/learning/prompts/documentation-lookup/24_io_json.md @@ -0,0 +1,32 @@ +Prompt: +Is JSON supported in Apache Beam? + +Response: +[JSON](https://www.json.org/) is a data format for storing and exchanging data. Apache Beam supports reading and writing JSON files using the `ReadFromJson` and `WriteToJson` transforms in the `TextIO` module. For more information, see the TextIO connector documentation: +* [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/TextIO.html) +* [Python](https://beam.apache.org/releases/pydoc/current/apache_beam.io.textio.html) +* [Go](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/textio) +* [Typescript](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/textio.ts) + +The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with JSON and Apache Beam. + +Apache Beam pipeline code for reading data from a JSON file might look like the following example: + +```python +class JsonOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--file_path', + default="gs://your-bucket/your-file.json", + help='Json file path' + ) + +options = JsonOptions() +with beam.Pipeline(options=options) as p: + output = (p | "Read from Json file" >> ReadFromJson( + path=options.file_path, + lines=False + ) + | "Log Data" >> Map(logging.info)) +``` \ No newline at end of file diff --git a/learning/prompts/documentation-lookup/25_io_csv.md b/learning/prompts/documentation-lookup/25_io_csv.md new file mode 100644 index 000000000000..8803fa28c799 --- /dev/null +++ b/learning/prompts/documentation-lookup/25_io_csv.md @@ -0,0 +1,32 @@ +Prompt: +Is CSV format supported in Apache Beam? + +Response: +[CSV](https://en.wikipedia.org/wiki/Comma-separated_values) is a data format for storing and exchanging data. Apache Beam supports reading data from and writing data to CSV files using the `ReadFromText` and `WriteToText` transforms in the `TextIO` module. For more information, see the TextIO connector documentation: +* [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/TextIO.html) +* [Python](https://beam.apache.org/releases/pydoc/current/apache_beam.io.textio.html) +* [Go](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/textio) +* [Typescript](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/textio.ts) + +The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with CSV and Apache Beam. + +Apache Beam pipeline code for reading data from a CSV file might look like the following example: +```python +class CsvOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--file_path', + default="gs://your-bucket/your-file.csv", + help='Csv file path' + ) + +options = CsvOptions() + +with beam.Pipeline(options=options) as p: + output = (p | "Read from Csv file" >> ReadFromCsv( + path=options.file_path + ) + | "Log Data" >> Map(logging.info)) + +``` \ No newline at end of file diff --git a/learning/prompts/documentation-lookup/26_io_avro.md b/learning/prompts/documentation-lookup/26_io_avro.md new file mode 100644 index 000000000000..4161c47710c8 --- /dev/null +++ b/learning/prompts/documentation-lookup/26_io_avro.md @@ -0,0 +1,31 @@ +Prompt: +Is Apache Avro format supported in Apache Beam? + +Response: +[Apache Avro](https://avro.apache.org/) is a data format for storing and exchanging data. Apache Beam supports reading from and writing to Avro files using the `ReadFromAvro` and `WriteToAvro` transforms in the `AvroIO` module. For more information, see the AvroIO connector documentation: +* [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/extensions/avro/io/AvroIO.html) +* [Python](https://beam.apache.org/releases/pydoc/current/apache_beam.io.avroio.html) +* [Go](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/avroio) +* [Typescript (using cross-language transforms)](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/avroio.ts) + +The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with Avro and Apache Beam. + +Apache Beam pipeline code for reading data from an Avro file might look like the following example: +```python + +class ReadAvroOptions(PipelineOptions): + + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + "--path", + default="gs://cloud-samples-data/bigquery/us-states/*.avro", + help="GCS path to read from") + + options = ReadAvroOptions() + + with beam.Pipeline(options=options) as p: + + (p | "Read from Avro" >> ReadFromAvro(options.path) + | Map(logging.info)) +``` diff --git a/learning/prompts/documentation-lookup/27_io_parquet.md b/learning/prompts/documentation-lookup/27_io_parquet.md new file mode 100644 index 000000000000..34ec68924218 --- /dev/null +++ b/learning/prompts/documentation-lookup/27_io_parquet.md @@ -0,0 +1,31 @@ +Prompt: +Is Apache Parquet format supported in Apache Beam? + +Response: +[Apache Parquet](https://parquet.apache.org/) is a data format for storing and exchanging data. Apache Beam supports reading from and writing to Parquet files using the `ReadFromParquet` and `WriteToParquet` transforms in the `ParquetIO` module. For more information, see the ParquetIO connector documentation: +* [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/parquet/ParquetIO.html) +* [Python](https://beam.apache.org/releases/pydoc/current/apache_beam.io.parquetio.html) +* [Go](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/parquetio) +* [Typescript (using cross-language transforms)](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/parquetio.ts) + + +The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with Avro and Apache Beam. + +Apache Beam pipeline code for reading data from a Parquet file might look like the following example: +```python +class ReadParquetOptions(PipelineOptions): + + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + "--path", + help="GCS path to parquet file") + + options = ReadParquetOptions() + + with beam.Pipeline(options=options) as p: + + (p | "ReadParquet" >> ReadFromParquet(options.path) + | Map(logging.info)) + +``` diff --git a/learning/prompts/documentation-lookup/28_io_jdbc.md b/learning/prompts/documentation-lookup/28_io_jdbc.md new file mode 100644 index 000000000000..13d6502c5bc8 --- /dev/null +++ b/learning/prompts/documentation-lookup/28_io_jdbc.md @@ -0,0 +1,52 @@ +Prompt: +Is Oracle Java Database Connectivity (JDBC) supported in Apache Beam? + +Response: +The [Oracle Java Database Connectivity (JDBC) API](https://docs.oracle.com/javase/8/docs/technotes/guides/jdbc/) is a Java API for connecting to relational databases. Apache Beam supports reading and writing data with JDBC using the `ReadFromJdbc` and `WriteToJdbc` transforms in the `JdbcIO` module. For more information, see the JdbcIO connector documentation: +* [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/jdbc/JdbcIO.html) +* [Python (using cross-language transforms)](https://beam.apache.org/releases/pydoc/current/apache_beam.io.jdbc.html) +* [Go (using cross-language transforms)](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/xlang/jdbcio) + +The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with JDBC and Apache Beam. + +Apache Beam pipeline code for reading data with JDBC might look like the following example: +```python +class ExampleRow(NamedTuple): + id: int + name: str + + +class ReadJdbcOptions(PipelineOptions): + + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + "--driverClassName", + help="JDBC driver class name") + parser.add_argument( + "--url", + help="JDBC URL") + parser.add_argument( + "--username", + help="JDBC username") + parser.add_argument( + "--password", + help="JDBC password") + parser.add_argument( + "--query", + default="SELECT * FROM users", + help="JDBC query") + +options = ReadJdbcOptions() + +with beam.Pipeline(options=options) as p: + + (p | "Read from JDBC" >> ReadFromJdbc( + driverClassName=options.driverClassName, + url=options.url, + username=options.username, + password=options.password, + query=options.query) + | Map(logging.info)) +``` + diff --git a/runners/core-construction-java/build.gradle b/runners/core-construction-java/build.gradle index 622bd7048e4b..20b5d936ee83 100644 --- a/runners/core-construction-java/build.gradle +++ b/runners/core-construction-java/build.gradle @@ -54,7 +54,6 @@ dependencies { implementation project(path: ":model:job-management", configuration: "shadow") implementation project(path: ":sdks:java:core", configuration: "shadow") implementation project(path: ":sdks:java:extensions:avro") - implementation project(path: ":sdks:java:fn-execution") implementation project(path: ":sdks:java:transform-service:launcher") implementation library.java.vendored_grpc_1_60_1 implementation library.java.vendored_guava_32_1_2_jre diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java index 9cf0606b68b9..5ea2c4968dd9 100644 --- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java +++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java @@ -21,7 +21,6 @@ import com.google.auto.service.AutoService; import java.io.IOException; -import java.util.Collection; import java.util.List; import java.util.Map; import java.util.concurrent.ThreadLocalRandom; @@ -670,15 +669,6 @@ public void output(RestrictionT part) { public void outputWithTimestamp(RestrictionT part, Instant timestamp) { throw new UnsupportedOperationException(); } - - @Override - public void outputWindowedValue( - RestrictionT output, - Instant timestamp, - Collection windows, - PaneInfo paneInfo) { - throw new UnsupportedOperationException(); - } }; } diff --git a/runners/core-java/build.gradle b/runners/core-java/build.gradle index b7dd51e72ed1..499cf06fcd6e 100644 --- a/runners/core-java/build.gradle +++ b/runners/core-java/build.gradle @@ -43,7 +43,6 @@ dependencies { implementation project(path: ":sdks:java:core", configuration: "shadow") implementation project(path: ":model:job-management", configuration: "shadow") implementation project(":runners:core-construction-java") - implementation project(":sdks:java:fn-execution") implementation library.java.vendored_guava_32_1_2_jre implementation library.java.joda_time implementation library.java.vendored_grpc_1_60_1 diff --git a/runners/direct-java/build.gradle b/runners/direct-java/build.gradle index 89bf6b30ed9e..39fc1d2a53c9 100644 --- a/runners/direct-java/build.gradle +++ b/runners/direct-java/build.gradle @@ -26,7 +26,6 @@ def dependOnProjects = [":runners:core-construction-java", ":runners:core-java", ":runners:local-java", ":runners:java-fn-execution", - ":sdks:java:fn-execution", ":sdks:java:extensions:avro" ] @@ -43,7 +42,6 @@ applyJavaNature( } relocate "org.apache.beam.runners.core", getJavaRelocatedPath("runners.core") relocate "org.apache.beam.runners.fnexecution", getJavaRelocatedPath("runners.fnexecution") - relocate "org.apache.beam.sdk.fn", getJavaRelocatedPath("sdk.fn") relocate "org.apache.beam.runners.local", getJavaRelocatedPath("runners.local") }, ) @@ -96,7 +94,6 @@ dependencies { validatesRunner project(path: project.path, configuration: "shadowTest") permitUnusedDeclared library.java.vendored_grpc_1_60_1 permitUnusedDeclared project(":runners:java-fn-execution") - permitUnusedDeclared project(":sdks:java:fn-execution") permitUnusedDeclared project(":sdks:java:extensions:avro") examplesJavaIntegrationTest project(project.path) examplesJavaIntegrationTest project(":examples:java") diff --git a/runners/flink/flink_runner.gradle b/runners/flink/flink_runner.gradle index e7fd87067cdf..55303b3530d4 100644 --- a/runners/flink/flink_runner.gradle +++ b/runners/flink/flink_runner.gradle @@ -222,7 +222,6 @@ dependencies { implementation project(path: ":model:fn-execution", configuration: "shadow") implementation project(path: ":model:pipeline", configuration: "shadow") implementation project(path: ":model:job-management", configuration: "shadow") - implementation project(":sdks:java:fn-execution") implementation library.java.jackson_databind runtimeOnly library.java.jackson_jaxb_annotations examplesJavaIntegrationTest project(project.path) diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/source/unbounded/FlinkUnboundedSourceReader.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/source/unbounded/FlinkUnboundedSourceReader.java index 99160a9689eb..39ef63c8f7e9 100644 --- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/source/unbounded/FlinkUnboundedSourceReader.java +++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/source/unbounded/FlinkUnboundedSourceReader.java @@ -69,6 +69,7 @@ public class FlinkUnboundedSourceReader // This name is defined in FLIP-33. @VisibleForTesting protected static final String PENDING_BYTES_METRIC_NAME = "pendingBytes"; private static final long SLEEP_ON_IDLE_MS = 50L; + private static final long MIN_WATERMARK_EMIT_INTERVAL_MS = 10L; private final AtomicReference> dataAvailableFutureRef; private final List readers; private int currentReaderIndex; @@ -103,22 +104,29 @@ public void start() { createPendingBytesGauge(context); Long watermarkInterval = pipelineOptions.as(FlinkPipelineOptions.class).getAutoWatermarkInterval(); - if (watermarkInterval != null) { - scheduleTaskAtFixedRate( - () -> { - // Set the watermark emission flag first. - shouldEmitWatermark = true; - // Wake up the main thread if necessary. - CompletableFuture f = dataAvailableFutureRef.get(); - if (f != DUMMY_FUTURE) { - f.complete(null); - } - }, - watermarkInterval, + if (watermarkInterval == null) { + watermarkInterval = + (pipelineOptions.as(FlinkPipelineOptions.class).getMaxBundleTimeMills()) / 5L; + watermarkInterval = + (watermarkInterval > MIN_WATERMARK_EMIT_INTERVAL_MS) + ? watermarkInterval + : MIN_WATERMARK_EMIT_INTERVAL_MS; + LOG.warn( + "AutoWatermarkInterval is not set, watermarks will be emitted at a default interval of {} ms", watermarkInterval); - } else { - LOG.warn("AutoWatermarkInterval is not set, watermarks won't be emitted."); } + scheduleTaskAtFixedRate( + () -> { + // Set the watermark emission flag first. + shouldEmitWatermark = true; + // Wake up the main thread if necessary. + CompletableFuture f = dataAvailableFutureRef.get(); + if (f != DUMMY_FUTURE) { + f.complete(null); + } + }, + watermarkInterval, + watermarkInterval); } @Override diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java index 290418bd1cb7..e50a630c8a4a 100644 --- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java +++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java @@ -258,7 +258,13 @@ public Dataflow create(PipelineOptions options) { @Default.Integer(60 * 1000) // 1 minute Integer getStreamingSideInputCacheExpirationMillis(); - void setstreamingSideInputCacheExpirationMillis(Integer value); + void setStreamingSideInputCacheExpirationMillis(Integer value); + + @Description("Number of commit threads used to commit items to streaming engine.") + @Default.Integer(1) + Integer getWindmillServiceCommitThreads(); + + void setWindmillServiceCommitThreads(Integer value); /** * The amount of time before UnboundedReaders are considered idle and closed during streaming diff --git a/runners/google-cloud-dataflow-java/worker/build.gradle b/runners/google-cloud-dataflow-java/worker/build.gradle index 6a15dbd5531b..4fcd49c4a243 100644 --- a/runners/google-cloud-dataflow-java/worker/build.gradle +++ b/runners/google-cloud-dataflow-java/worker/build.gradle @@ -126,10 +126,6 @@ applyJavaNature( relocate("org.apache.beam.runners.fnexecution", getWorkerRelocatedPath("org.apache.beam.runners.fnexecution")) relocate("org.apache.beam.repackaged.beam_runners_java_fn_execution", getWorkerRelocatedPath("org.apache.beam.repackaged.beam_runners_java_fn_execution")) - dependencies { - include(project(":sdks:java:fn-execution")) - } - relocate("org.apache.beam.sdk.fn", getWorkerRelocatedPath("org.apache.beam.sdk.fn")) relocate("org.apache.beam.repackaged.beam_sdks_java_fn_execution", getWorkerRelocatedPath("org.apache.beam.repackaged.beam_sdks_java_fn_execution")) dependencies { @@ -193,7 +189,6 @@ dependencies { implementation project(":runners:core-construction-java") implementation project(":runners:core-java") implementation project(":runners:java-fn-execution") - implementation project(":sdks:java:fn-execution") implementation project(path: ":runners:google-cloud-dataflow-java:worker:windmill", configuration: "shadow") shadow library.java.vendored_guava_32_1_2_jre diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowBatchWorkerHarness.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowBatchWorkerHarness.java index cc79ac6dbc0d..51127c2dc2fa 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowBatchWorkerHarness.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowBatchWorkerHarness.java @@ -61,7 +61,7 @@ public static void main(String[] args) throws Exception { DataflowWorkerHarnessHelper.initializeLogging(DataflowBatchWorkerHarness.class); DataflowWorkerHarnessOptions pipelineOptions = DataflowWorkerHarnessHelper.initializeGlobalStateAndPipelineOptions( - DataflowBatchWorkerHarness.class); + DataflowBatchWorkerHarness.class, DataflowWorkerHarnessOptions.class); DataflowBatchWorkerHarness batchHarness = new DataflowBatchWorkerHarness(pipelineOptions); DataflowWorkerHarnessHelper.configureLogging(pipelineOptions); diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClient.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClient.java index bb39e3bd9af4..f3caa8d0f3ac 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClient.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClient.java @@ -29,6 +29,7 @@ import com.google.api.services.dataflow.Dataflow; import com.google.api.services.dataflow.model.LeaseWorkItemRequest; import com.google.api.services.dataflow.model.LeaseWorkItemResponse; +import com.google.api.services.dataflow.model.PerWorkerMetrics; import com.google.api.services.dataflow.model.ReportWorkItemStatusRequest; import com.google.api.services.dataflow.model.ReportWorkItemStatusResponse; import com.google.api.services.dataflow.model.SendWorkerMessagesRequest; @@ -292,13 +293,30 @@ public WorkerMessage createWorkerMessageFromStreamingScalingReport( return msg; } - /** Reports the autoscaling signals to dataflow */ @Override - public void reportWorkerMessage(WorkerMessage msg) throws IOException { + public WorkerMessage createWorkerMessageFromPerWorkerMetrics(PerWorkerMetrics report) { + DateTime endTime = DateTime.now(); + logger.debug("Reporting WorkMessageResponse"); + Map labels = + ImmutableMap.of("JOB_ID", options.getJobId(), "WORKER_ID", options.getWorkerId()); + WorkerMessage msg = + new WorkerMessage() + .setTime(toCloudTime(endTime)) + .setPerWorkerMetrics(report) + .setLabels(labels); + return msg; + } + + /** + * Reports the worker messages to dataflow. We currently report autoscaling signals and + * perworkermetrics with this path. + */ + @Override + public void reportWorkerMessage(List messages) throws IOException { SendWorkerMessagesRequest request = new SendWorkerMessagesRequest() .setLocation(options.getRegion()) - .setWorkerMessages(Collections.singletonList(msg)); + .setWorkerMessages(messages); SendWorkerMessagesResponse result = dataflow .projects() diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelper.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelper.java index c6d8d727ef4d..94c894608a47 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelper.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelper.java @@ -49,11 +49,11 @@ public final class DataflowWorkerHarnessHelper { private static final String ROOT_LOGGER_NAME = ""; private static final String PIPELINE_PATH = "PIPELINE_PATH"; - public static DataflowWorkerHarnessOptions initializeGlobalStateAndPipelineOptions( - Class workerHarnessClass) throws Exception { + public static T initializeGlobalStateAndPipelineOptions( + Class workerHarnessClass, Class harnessOptionsClass) throws Exception { /* Extract pipeline options. */ - DataflowWorkerHarnessOptions pipelineOptions = - WorkerPipelineOptionsFactory.createFromSystemProperties(); + T pipelineOptions = + WorkerPipelineOptionsFactory.createFromSystemProperties(harnessOptionsClass); pipelineOptions.setAppName(workerHarnessClass.getSimpleName()); /* Configure logging with job-specific properties. */ diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverter.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverter.java index f3f31ccfd36f..7b72d6507788 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverter.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverter.java @@ -52,17 +52,14 @@ private static Optional convertCounterToMetricValue( return Optional.empty(); } - BigQuerySinkMetrics.ParsedMetricName labeledName = - BigQuerySinkMetrics.parseMetricName(metricName.getName()); - if (labeledName == null || labeledName.getBaseName().isEmpty()) { - return Optional.empty(); - } - - return Optional.of( - new MetricValue() - .setMetric(labeledName.getBaseName()) - .setMetricLabels(labeledName.getMetricLabels()) - .setValueInt64(value)); + return BigQuerySinkMetrics.parseMetricName(metricName.getName()) + .filter(labeledName -> !labeledName.getBaseName().isEmpty()) + .map( + labeledName -> + new MetricValue() + .setMetric(labeledName.getBaseName()) + .setMetricLabels(labeledName.getMetricLabels()) + .setValueInt64(value)); } /** @@ -78,9 +75,9 @@ private static Optional convertHistogramToMetricValue( return Optional.empty(); } - BigQuerySinkMetrics.ParsedMetricName labeledName = + Optional labeledName = BigQuerySinkMetrics.parseMetricName(metricName.getName()); - if (labeledName == null || labeledName.getBaseName().isEmpty()) { + if (!labeledName.isPresent() || labeledName.get().getBaseName().isEmpty()) { return Optional.empty(); } @@ -133,8 +130,8 @@ private static Optional convertHistogramToMetricValue( return Optional.of( new MetricValue() - .setMetric(labeledName.getBaseName()) - .setMetricLabels(labeledName.getMetricLabels()) + .setMetric(labeledName.get().getBaseName()) + .setMetricLabels(labeledName.get().getMetricLabels()) .setValueHistogram(histogramValue)); } diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorker.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorker.java index d915b77995db..463ab953faee 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorker.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorker.java @@ -23,12 +23,15 @@ import com.google.api.services.dataflow.model.CounterUpdate; import com.google.api.services.dataflow.model.MapTask; +import com.google.api.services.dataflow.model.PerStepNamespaceMetrics; +import com.google.api.services.dataflow.model.PerWorkerMetrics; import com.google.api.services.dataflow.model.Status; import com.google.api.services.dataflow.model.StreamingComputationConfig; import com.google.api.services.dataflow.model.StreamingConfigTask; import com.google.api.services.dataflow.model.StreamingScalingReport; import com.google.api.services.dataflow.model.WorkItem; import com.google.api.services.dataflow.model.WorkItemStatus; +import com.google.api.services.dataflow.model.WorkerMessage; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.io.File; import java.io.IOException; @@ -61,7 +64,6 @@ import org.apache.beam.runners.core.metrics.MetricsLogger; import org.apache.beam.runners.dataflow.DataflowRunner; import org.apache.beam.runners.dataflow.internal.CustomSources; -import org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions; import org.apache.beam.runners.dataflow.util.CloudObject; import org.apache.beam.runners.dataflow.util.CloudObjects; import org.apache.beam.runners.dataflow.worker.DataflowSystemMetrics.StreamingSystemCounterNames; @@ -117,6 +119,7 @@ import org.apache.beam.sdk.fn.IdGenerators; import org.apache.beam.sdk.fn.JvmInitializers; import org.apache.beam.sdk.io.FileSystems; +import org.apache.beam.sdk.io.gcp.bigquery.BigQuerySinkMetrics; import org.apache.beam.sdk.metrics.MetricName; import org.apache.beam.sdk.metrics.MetricsEnvironment; import org.apache.beam.sdk.util.BackOff; @@ -131,11 +134,7 @@ import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Splitter; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.cache.Cache; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.cache.CacheBuilder; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.EvictingQueue; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ListMultimap; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.MultimapBuilder; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.*; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.graph.MutableNetwork; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.net.HostAndPort; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.util.concurrent.ThreadFactoryBuilder; @@ -227,7 +226,7 @@ public class StreamingDataflowWorker { private final BoundedQueueExecutor workUnitExecutor; private final WindmillServerStub windmillServer; private final Thread dispatchThread; - private final Thread commitThread; + @VisibleForTesting final ImmutableList commitThreads; private final AtomicLong activeCommitBytes = new AtomicLong(); private final AtomicLong previousTimeAtMaxThreads = new AtomicLong(); private final AtomicBoolean running = new AtomicBoolean(); @@ -405,21 +404,28 @@ public void run() { dispatchThread.setPriority(Thread.MIN_PRIORITY); dispatchThread.setName("DispatchThread"); - commitThread = - new Thread( - new Runnable() { - @Override - public void run() { + int numCommitThreads = 1; + if (windmillServiceEnabled && options.getWindmillServiceCommitThreads() > 0) { + numCommitThreads = options.getWindmillServiceCommitThreads(); + } + + ImmutableList.Builder commitThreadsBuilder = ImmutableList.builder(); + for (int i = 0; i < numCommitThreads; ++i) { + Thread commitThread = + new Thread( + () -> { if (windmillServiceEnabled) { streamingCommitLoop(); } else { commitLoop(); } - } - }); - commitThread.setDaemon(true); - commitThread.setPriority(Thread.MAX_PRIORITY); - commitThread.setName("CommitThread"); + }); + commitThread.setDaemon(true); + commitThread.setPriority(Thread.MAX_PRIORITY); + commitThread.setName("CommitThread " + i); + commitThreadsBuilder.add(commitThread); + } + commitThreads = commitThreadsBuilder.build(); this.publishCounters = publishCounters; this.windmillServer = options.getWindmillServerStub(); @@ -465,9 +471,9 @@ public static void main(String[] args) throws Exception { JvmInitializers.runOnStartup(); DataflowWorkerHarnessHelper.initializeLogging(StreamingDataflowWorker.class); - DataflowWorkerHarnessOptions options = + StreamingDataflowWorkerOptions options = DataflowWorkerHarnessHelper.initializeGlobalStateAndPipelineOptions( - StreamingDataflowWorker.class); + StreamingDataflowWorker.class, StreamingDataflowWorkerOptions.class); DataflowWorkerHarnessHelper.configureLogging(options); checkArgument( options.isStreaming(), @@ -479,31 +485,33 @@ public static void main(String[] args) throws Exception { "%s cannot be main() class with beam_fn_api enabled", StreamingDataflowWorker.class.getSimpleName()); - StreamingDataflowWorker worker = - StreamingDataflowWorker.fromDataflowWorkerHarnessOptions(options); + StreamingDataflowWorker worker = StreamingDataflowWorker.fromOptions(options); // Use the MetricsLogger container which is used by BigQueryIO to periodically log process-wide // metrics. MetricsEnvironment.setProcessWideContainer(new MetricsLogger(null)); - // When enabled, the Pipeline will record Per-Worker metrics that will be piped to WMW. + // When enabled, the Pipeline will record Per-Worker metrics that will be piped to DFE. StreamingStepMetricsContainer.setEnablePerWorkerMetrics( options.isEnableStreamingEngine() && DataflowRunner.hasExperiment(options, "enable_per_worker_metrics")); + // StreamingStepMetricsContainer automatically deletes perWorkerCounters if they are zero-valued + // for longer than 5 minutes. + BigQuerySinkMetrics.setSupportMetricsDeletion(true); JvmInitializers.runBeforeProcessing(options); worker.startStatusPages(); worker.start(); } - public static StreamingDataflowWorker fromDataflowWorkerHarnessOptions( - DataflowWorkerHarnessOptions options) throws IOException { + public static StreamingDataflowWorker fromOptions(StreamingDataflowWorkerOptions options) + throws IOException { return new StreamingDataflowWorker( Collections.emptyList(), IntrinsicMapTaskExecutorFactory.defaultFactory(), new DataflowWorkUnitClient(options, LOG), - options.as(StreamingDataflowWorkerOptions.class), + options, true, new HotKeyLogger(), Instant::now, @@ -578,7 +586,7 @@ public void start() { memoryMonitorThread.start(); dispatchThread.start(); - commitThread.start(); + commitThreads.forEach(Thread::start); sampler.start(); // Periodically report workers counters and other updates. @@ -710,10 +718,12 @@ public void stop() { running.set(false); dispatchThread.interrupt(); dispatchThread.join(); - // We need to interrupt the commitThread in case it is blocking on pulling + // We need to interrupt the commitThreads in case they are blocking on pulling // from the commitQueue. - commitThread.interrupt(); - commitThread.join(); + commitThreads.forEach(Thread::interrupt); + for (Thread commitThread : commitThreads) { + commitThread.join(); + } memoryMonitor.stop(); memoryMonitorThread.join(); workUnitExecutor.shutdown(); @@ -1367,9 +1377,9 @@ private void commitLoop() { } Windmill.CommitWorkRequest commitRequest = commitRequestBuilder.build(); LOG.trace("Commit: {}", commitRequest); - activeCommitBytes.set(commitBytes); + activeCommitBytes.addAndGet(commitBytes); windmillServer.commitWork(commitRequest); - activeCommitBytes.set(0); + activeCommitBytes.addAndGet(-commitBytes); for (Map.Entry entry : computationRequestMap.entrySet()) { ComputationState computationState = entry.getKey(); @@ -1385,12 +1395,21 @@ private void commitLoop() { // Adds the commit to the commitStream if it fits, returning true iff it is consumed. private boolean addCommitToStream(Commit commit, CommitWorkStream commitStream) { Preconditions.checkNotNull(commit); + final ComputationState state = commit.computationState(); + final Windmill.WorkItemCommitRequest request = commit.request(); // Drop commits for failed work. Such commits will be dropped by Windmill anyway. if (commit.work().isFailed()) { + readerCache.invalidateReader( + WindmillComputationKey.create( + state.getComputationId(), request.getKey(), request.getShardingKey())); + stateCache + .forComputation(state.getComputationId()) + .invalidate(request.getKey(), request.getShardingKey()); + state.completeWorkAndScheduleNextWorkForKey( + ShardedKey.create(request.getKey(), request.getShardingKey()), request.getWorkToken()); return true; } - final ComputationState state = commit.computationState(); - final Windmill.WorkItemCommitRequest request = commit.request(); + final int size = commit.getSize(); commit.work().setState(Work.State.COMMITTING); activeCommitBytes.addAndGet(size); @@ -1407,8 +1426,6 @@ private boolean addCommitToStream(Commit commit, CommitWorkStream commitStream) .invalidate(request.getKey(), request.getShardingKey()); } activeCommitBytes.addAndGet(-size); - // This may throw an exception if the commit was not active, which is possible if it - // was deemed stuck. state.completeWorkAndScheduleNextWorkForKey( ShardedKey.create(request.getKey(), request.getShardingKey()), request.getWorkToken()); @@ -1777,7 +1794,7 @@ private void updateThreadMetrics() { maxOutstandingBundles.addValue((long) workUnitExecutor.maximumElementsOutstanding()); } - private void sendWorkerMessage() throws IOException { + private WorkerMessage createWorkerMessageForStreamingScalingReport() { StreamingScalingReport activeThreadsReport = new StreamingScalingReport() .setActiveThreadCount(workUnitExecutor.activeCount()) @@ -1786,8 +1803,33 @@ private void sendWorkerMessage() throws IOException { .setMaximumThreadCount(chooseMaximumNumberOfThreads()) .setMaximumBundleCount(workUnitExecutor.maximumElementsOutstanding()) .setMaximumBytes(workUnitExecutor.maximumBytesOutstanding()); - workUnitClient.reportWorkerMessage( - workUnitClient.createWorkerMessageFromStreamingScalingReport(activeThreadsReport)); + return workUnitClient.createWorkerMessageFromStreamingScalingReport(activeThreadsReport); + } + + private Optional createWorkerMessageForPerWorkerMetrics() { + List metrics = new ArrayList<>(); + stageInfoMap.values().forEach(s -> metrics.addAll(s.extractPerWorkerMetricValues())); + + if (metrics.isEmpty()) { + return Optional.empty(); + } + + PerWorkerMetrics perWorkerMetrics = new PerWorkerMetrics().setPerStepNamespaceMetrics(metrics); + return Optional.of(workUnitClient.createWorkerMessageFromPerWorkerMetrics(perWorkerMetrics)); + } + + private void sendWorkerMessage() throws IOException { + List workerMessages = new ArrayList(2); + workerMessages.add(createWorkerMessageForStreamingScalingReport()); + + if (StreamingStepMetricsContainer.getEnablePerWorkerMetrics()) { + Optional metricsMsg = createWorkerMessageForPerWorkerMetrics(); + if (metricsMsg.isPresent()) { + workerMessages.add(metricsMsg.get()); + } + } + + workUnitClient.reportWorkerMessage(workerMessages); } @VisibleForTesting diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainer.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainer.java index af3b3e51071f..54a3ef49776b 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainer.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainer.java @@ -98,7 +98,10 @@ protected StreamingStepMetricsContainer createContainer(String stepName) { }; } - // Testing constructor. + /** + * Construct a {@code StreamingStepMetricsContainer} that supports mock clock, perWorkerCounters, + * and perWorkerCountersByFirstStaleTime. For testing purposes only. + */ private StreamingStepMetricsContainer( String stepName, Map perWorkerCountersByFirstStaleTime, @@ -218,6 +221,9 @@ public static void setEnablePerWorkerMetrics(Boolean enablePerWorkerMetrics) { StreamingStepMetricsContainer.enablePerWorkerMetrics = enablePerWorkerMetrics; } + public static boolean getEnablePerWorkerMetrics() { + return StreamingStepMetricsContainer.enablePerWorkerMetrics; + } /** * Updates {@code perWorkerCountersByFirstStaleTime} with the current zero-valued metrics and * removes metrics that have been stale for longer than {@code maximumPerWorkerCounterStaleness} diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WorkUnitClient.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WorkUnitClient.java index 6f26f4048070..f9637a375ed5 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WorkUnitClient.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WorkUnitClient.java @@ -17,12 +17,14 @@ */ package org.apache.beam.runners.dataflow.worker; +import com.google.api.services.dataflow.model.PerWorkerMetrics; import com.google.api.services.dataflow.model.StreamingScalingReport; import com.google.api.services.dataflow.model.WorkItem; import com.google.api.services.dataflow.model.WorkItemServiceState; import com.google.api.services.dataflow.model.WorkItemStatus; import com.google.api.services.dataflow.model.WorkerMessage; import java.io.IOException; +import java.util.List; import java.util.Optional; /** Abstract base class describing a client for WorkItem work units. */ @@ -61,9 +63,18 @@ interface WorkUnitClient { WorkerMessage createWorkerMessageFromStreamingScalingReport(StreamingScalingReport report); /** - * Reports the autoscaling signals with a {@link StreamingScalingReport}. + * Creates a {@link WorkerMessage} containing the given PerWorkerMetrics * - * @param msg the WorkerMessage to report + * @param metrics Metric updates to send to the backend. + * @return a {@link WorkerMessage} + */ + WorkerMessage createWorkerMessageFromPerWorkerMetrics(PerWorkerMetrics metrics); + + /** + * Reports the worker messages to dataflow. We currently report autoscaling signals and + * perworkermetrics with this path. + * + * @param msg the WorkerMessages to report */ - void reportWorkerMessage(WorkerMessage msg) throws IOException; + void reportWorkerMessage(List messages) throws IOException; } diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WorkerPipelineOptionsFactory.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WorkerPipelineOptionsFactory.java index 0929705c8941..a3ec8933c331 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WorkerPipelineOptionsFactory.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WorkerPipelineOptionsFactory.java @@ -46,17 +46,16 @@ public class WorkerPipelineOptionsFactory { * @return A {@link DataflowWorkerHarnessOptions} object configured for the Dataflow worker * harness. */ - public static DataflowWorkerHarnessOptions createFromSystemProperties() throws IOException { + public static T createFromSystemProperties( + Class harnessOptionsClass) throws IOException { ObjectMapper objectMapper = new ObjectMapper(); - DataflowWorkerHarnessOptions options; + T options; if (System.getProperties().containsKey("sdk_pipeline_options")) { // TODO: remove this method of getting pipeline options, once migration is complete. String serializedOptions = System.getProperty("sdk_pipeline_options"); LOG.info("Worker harness starting with: {}", serializedOptions); options = - objectMapper - .readValue(serializedOptions, PipelineOptions.class) - .as(DataflowWorkerHarnessOptions.class); + objectMapper.readValue(serializedOptions, PipelineOptions.class).as(harnessOptionsClass); } else if (System.getProperties().containsKey("sdk_pipeline_options_file")) { String filePath = System.getProperty("sdk_pipeline_options_file"); LOG.info("Loading pipeline options from " + filePath); @@ -64,12 +63,10 @@ public static DataflowWorkerHarnessOptions createFromSystemProperties() throws I new String(Files.readAllBytes(Paths.get(filePath)), StandardCharsets.UTF_8); LOG.info("Worker harness starting with: " + serializedOptions); options = - objectMapper - .readValue(serializedOptions, PipelineOptions.class) - .as(DataflowWorkerHarnessOptions.class); + objectMapper.readValue(serializedOptions, PipelineOptions.class).as(harnessOptionsClass); } else { LOG.info("Using empty PipelineOptions, as none were provided."); - options = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class); + options = PipelineOptionsFactory.as(harnessOptionsClass); } // These values will not be known at job submission time and must be provided. diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/ActiveWorkState.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/ActiveWorkState.java index 54942dfeee1f..ff46356d9569 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/ActiveWorkState.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/ActiveWorkState.java @@ -188,16 +188,13 @@ synchronized Optional completeWorkAndGetNextWorkForKey( private synchronized void removeCompletedWorkFromQueue( Queue workQueue, ShardedKey shardedKey, long workToken) { - // avoid Preconditions.checkState here to prevent eagerly evaluating the - // format string parameters for the error message. - Work completedWork = - Optional.ofNullable(workQueue.peek()) - .orElseThrow( - () -> - new IllegalStateException( - String.format( - "Active key %s without work, expected token %d", - shardedKey, workToken))); + Work completedWork = workQueue.peek(); + if (completedWork == null) { + // Work may have been completed due to clearing of stuck commits. + LOG.warn( + String.format("Active key %s without work, expected token %d", shardedKey, workToken)); + return; + } if (completedWork.getWorkItem().getWorkToken() != workToken) { // Work may have been completed due to clearing of stuck commits. diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/StageInfo.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/StageInfo.java index b514dfc84bb9..64c97dcac516 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/StageInfo.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/StageInfo.java @@ -21,6 +21,7 @@ import com.google.api.services.dataflow.model.CounterStructuredName; import com.google.api.services.dataflow.model.CounterUpdate; +import com.google.api.services.dataflow.model.PerStepNamespaceMetrics; import com.google.auto.value.AutoValue; import java.util.ArrayList; import java.util.List; @@ -111,4 +112,12 @@ private void translateKnownStepCounters(CounterUpdate stepCounterUpdate) { } } } + + public List extractPerWorkerMetricValues() { + List metrics = new ArrayList<>(); + Iterables.addAll( + metrics, + StreamingStepMetricsContainer.extractPerWorkerMetricUpdates(metricsContainerRegistry())); + return metrics; + } } diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java index 7019217b9fbd..fac56890f498 100644 --- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java @@ -18,11 +18,7 @@ package org.apache.beam.runners.dataflow.worker; import static org.junit.Assert.assertEquals; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.doCallRealMethod; -import static org.mockito.Mockito.when; -import com.google.api.client.http.LowLevelHttpResponse; import com.google.api.client.json.Json; import com.google.api.client.testing.http.MockHttpTransport; import com.google.api.client.testing.http.MockLowLevelHttpRequest; @@ -31,6 +27,9 @@ import com.google.api.services.dataflow.model.LeaseWorkItemRequest; import com.google.api.services.dataflow.model.LeaseWorkItemResponse; import com.google.api.services.dataflow.model.MapTask; +import com.google.api.services.dataflow.model.MetricValue; +import com.google.api.services.dataflow.model.PerStepNamespaceMetrics; +import com.google.api.services.dataflow.model.PerWorkerMetrics; import com.google.api.services.dataflow.model.SendWorkerMessagesRequest; import com.google.api.services.dataflow.model.SendWorkerMessagesResponse; import com.google.api.services.dataflow.model.SeqMapTask; @@ -38,6 +37,7 @@ import com.google.api.services.dataflow.model.WorkItem; import com.google.api.services.dataflow.model.WorkerMessage; import java.io.IOException; +import java.util.Collections; import java.util.Optional; import org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions; import org.apache.beam.runners.dataflow.worker.logging.DataflowWorkerLoggingMDC; @@ -49,7 +49,6 @@ import org.apache.beam.sdk.util.FastNanoClockAndSleeper; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; @@ -57,8 +56,6 @@ import org.junit.rules.Timeout; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; -import org.mockito.Mock; -import org.mockito.MockitoAnnotations; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -71,36 +68,34 @@ public class DataflowWorkUnitClientTest { private static final String PROJECT_ID = "TEST_PROJECT_ID"; private static final String JOB_ID = "TEST_JOB_ID"; private static final String WORKER_ID = "TEST_WORKER_ID"; + @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties(); @Rule public TestRule restoreLogging = new RestoreDataflowLoggingMDC(); @Rule public ExpectedException expectedException = ExpectedException.none(); @Rule public FastNanoClockAndSleeper fastNanoClockAndSleeper = new FastNanoClockAndSleeper(); - @Mock private MockHttpTransport transport; - @Mock private MockLowLevelHttpRequest request; - private DataflowWorkerHarnessOptions pipelineOptions; - - @Before - public void setUp() throws Exception { - MockitoAnnotations.initMocks(this); - when(transport.buildRequest(anyString(), anyString())).thenReturn(request); - doCallRealMethod().when(request).getContentAsString(); + DataflowWorkerHarnessOptions createPipelineOptionsWithTransport(MockHttpTransport transport) { Dataflow service = new Dataflow(transport, Transport.getJsonFactory(), null); - pipelineOptions = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class); + DataflowWorkerHarnessOptions pipelineOptions = + PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class); pipelineOptions.setProject(PROJECT_ID); pipelineOptions.setJobId(JOB_ID); pipelineOptions.setWorkerId(WORKER_ID); pipelineOptions.setGcpCredential(new TestCredential()); pipelineOptions.setDataflowClient(service); pipelineOptions.setRegion("us-central1"); + return pipelineOptions; } @Test public void testCloudServiceCall() throws Exception { WorkItem workItem = createWorkItem(PROJECT_ID, JOB_ID); - when(request.execute()).thenReturn(generateMockResponse(workItem)); - + MockLowLevelHttpResponse response = generateMockResponse(workItem); + MockLowLevelHttpRequest request = new MockLowLevelHttpRequest().setResponse(response); + MockHttpTransport transport = + new MockHttpTransport.Builder().setLowLevelHttpRequest(request).build(); + DataflowWorkerHarnessOptions pipelineOptions = createPipelineOptionsWithTransport(transport); WorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG); assertEquals(Optional.of(workItem), client.getWorkItem()); @@ -120,30 +115,40 @@ public void testCloudServiceCall() throws Exception { @Test public void testCloudServiceCallMapTaskStagePropagation() throws Exception { - WorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG); - // Publish and acquire a map task work item, and verify we're now processing that stage. final String stageName = "test_stage_name"; MapTask mapTask = new MapTask(); mapTask.setStageName(stageName); WorkItem workItem = createWorkItem(PROJECT_ID, JOB_ID); workItem.setMapTask(mapTask); - when(request.execute()).thenReturn(generateMockResponse(workItem)); + + MockLowLevelHttpResponse response = generateMockResponse(workItem); + MockLowLevelHttpRequest request = new MockLowLevelHttpRequest().setResponse(response); + MockHttpTransport transport = + new MockHttpTransport.Builder().setLowLevelHttpRequest(request).build(); + DataflowWorkerHarnessOptions pipelineOptions = createPipelineOptionsWithTransport(transport); + WorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG); + assertEquals(Optional.of(workItem), client.getWorkItem()); assertEquals(stageName, DataflowWorkerLoggingMDC.getStageName()); } @Test public void testCloudServiceCallSeqMapTaskStagePropagation() throws Exception { - WorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG); - // Publish and acquire a seq map task work item, and verify we're now processing that stage. final String stageName = "test_stage_name"; SeqMapTask seqMapTask = new SeqMapTask(); seqMapTask.setStageName(stageName); WorkItem workItem = createWorkItem(PROJECT_ID, JOB_ID); workItem.setSeqMapTask(seqMapTask); - when(request.execute()).thenReturn(generateMockResponse(workItem)); + + MockLowLevelHttpResponse response = generateMockResponse(workItem); + MockLowLevelHttpRequest request = new MockLowLevelHttpRequest().setResponse(response); + MockHttpTransport transport = + new MockHttpTransport.Builder().setLowLevelHttpRequest(request).build(); + DataflowWorkerHarnessOptions pipelineOptions = createPipelineOptionsWithTransport(transport); + WorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG); + assertEquals(Optional.of(workItem), client.getWorkItem()); assertEquals(stageName, DataflowWorkerLoggingMDC.getStageName()); } @@ -153,8 +158,11 @@ public void testCloudServiceCallNoWorkPresent() throws Exception { // If there's no work the service should return an empty work item. WorkItem workItem = new WorkItem(); - when(request.execute()).thenReturn(generateMockResponse(workItem)); - + MockLowLevelHttpResponse response = generateMockResponse(workItem); + MockLowLevelHttpRequest request = new MockLowLevelHttpRequest().setResponse(response); + MockHttpTransport transport = + new MockHttpTransport.Builder().setLowLevelHttpRequest(request).build(); + DataflowWorkerHarnessOptions pipelineOptions = createPipelineOptionsWithTransport(transport); WorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG); assertEquals(Optional.empty(), client.getWorkItem()); @@ -177,8 +185,11 @@ public void testCloudServiceCallNoWorkId() throws Exception { WorkItem workItem = createWorkItem(PROJECT_ID, JOB_ID); workItem.setId(null); - when(request.execute()).thenReturn(generateMockResponse(workItem)); - + MockLowLevelHttpResponse response = generateMockResponse(workItem); + MockLowLevelHttpRequest request = new MockLowLevelHttpRequest().setResponse(response); + MockHttpTransport transport = + new MockHttpTransport.Builder().setLowLevelHttpRequest(request).build(); + DataflowWorkerHarnessOptions pipelineOptions = createPipelineOptionsWithTransport(transport); WorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG); assertEquals(Optional.empty(), client.getWorkItem()); @@ -197,8 +208,11 @@ public void testCloudServiceCallNoWorkId() throws Exception { @Test public void testCloudServiceCallNoWorkItem() throws Exception { - when(request.execute()).thenReturn(generateMockResponse()); - + MockLowLevelHttpResponse response = generateMockResponse(); + MockLowLevelHttpRequest request = new MockLowLevelHttpRequest().setResponse(response); + MockHttpTransport transport = + new MockHttpTransport.Builder().setLowLevelHttpRequest(request).build(); + DataflowWorkerHarnessOptions pipelineOptions = createPipelineOptionsWithTransport(transport); WorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG); assertEquals(Optional.empty(), client.getWorkItem()); @@ -224,21 +238,30 @@ public void testCloudServiceCallMultipleWorkItems() throws Exception { WorkItem workItem1 = createWorkItem(PROJECT_ID, JOB_ID); WorkItem workItem2 = createWorkItem(PROJECT_ID, JOB_ID); - when(request.execute()).thenReturn(generateMockResponse(workItem1, workItem2)); - + MockLowLevelHttpResponse response = generateMockResponse(workItem1, workItem2); + MockLowLevelHttpRequest request = new MockLowLevelHttpRequest().setResponse(response); + MockHttpTransport transport = + new MockHttpTransport.Builder().setLowLevelHttpRequest(request).build(); + DataflowWorkerHarnessOptions pipelineOptions = createPipelineOptionsWithTransport(transport); WorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG); client.getWorkItem(); } @Test - public void testReportWorkerMessage() throws Exception { + public void testReportWorkerMessage_streamingScalingReport() throws Exception { MockLowLevelHttpResponse response = new MockLowLevelHttpResponse(); response.setContentType(Json.MEDIA_TYPE); SendWorkerMessagesResponse workerMessage = new SendWorkerMessagesResponse(); workerMessage.setFactory(Transport.getJsonFactory()); response.setContent(workerMessage.toPrettyString()); - when(request.execute()).thenReturn(response); + + MockLowLevelHttpRequest request = new MockLowLevelHttpRequest().setResponse(response); + MockHttpTransport transport = + new MockHttpTransport.Builder().setLowLevelHttpRequest(request).build(); + DataflowWorkerHarnessOptions pipelineOptions = createPipelineOptionsWithTransport(transport); + WorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG); + StreamingScalingReport activeThreadsReport = new StreamingScalingReport() .setActiveThreadCount(1) @@ -247,9 +270,8 @@ public void testReportWorkerMessage() throws Exception { .setMaximumThreadCount(4) .setMaximumBundleCount(5) .setMaximumBytes(6L); - WorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG); WorkerMessage msg = client.createWorkerMessageFromStreamingScalingReport(activeThreadsReport); - client.reportWorkerMessage(msg); + client.reportWorkerMessage(Collections.singletonList(msg)); SendWorkerMessagesRequest actualRequest = Transport.getJsonFactory() @@ -257,7 +279,41 @@ public void testReportWorkerMessage() throws Exception { assertEquals(ImmutableList.of(msg), actualRequest.getWorkerMessages()); } - private LowLevelHttpResponse generateMockResponse(WorkItem... workItems) throws Exception { + @Test + public void testReportWorkerMessage_perWorkerMetrics() throws Exception { + MockLowLevelHttpResponse response = new MockLowLevelHttpResponse(); + response.setContentType(Json.MEDIA_TYPE); + SendWorkerMessagesResponse workerMessage = new SendWorkerMessagesResponse(); + workerMessage.setFactory(Transport.getJsonFactory()); + response.setContent(workerMessage.toPrettyString()); + + MockLowLevelHttpRequest request = new MockLowLevelHttpRequest().setResponse(response); + MockHttpTransport transport = + new MockHttpTransport.Builder().setLowLevelHttpRequest(request).build(); + DataflowWorkerHarnessOptions pipelineOptions = createPipelineOptionsWithTransport(transport); + WorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG); + + PerStepNamespaceMetrics stepNamespaceMetrics = + new PerStepNamespaceMetrics() + .setOriginalStep("s1") + .setMetricsNamespace("ns") + .setMetricValues( + Collections.singletonList(new MetricValue().setMetric("metric").setValueInt64(3L))); + PerWorkerMetrics perWorkerMetrics = + new PerWorkerMetrics() + .setPerStepNamespaceMetrics(Collections.singletonList(stepNamespaceMetrics)); + + WorkerMessage perWorkerMetricsMsg = + client.createWorkerMessageFromPerWorkerMetrics(perWorkerMetrics); + client.reportWorkerMessage(Collections.singletonList(perWorkerMetricsMsg)); + + SendWorkerMessagesRequest actualRequest = + Transport.getJsonFactory() + .fromString(request.getContentAsString(), SendWorkerMessagesRequest.class); + assertEquals(ImmutableList.of(perWorkerMetricsMsg), actualRequest.getWorkerMessages()); + } + + private MockLowLevelHttpResponse generateMockResponse(WorkItem... workItems) throws Exception { MockLowLevelHttpResponse response = new MockLowLevelHttpResponse(); response.setContentType(Json.MEDIA_TYPE); LeaseWorkItemResponse lease = new LeaseWorkItemResponse(); diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelperTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelperTest.java index de9d9cf7d155..c46a112c4e77 100644 --- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelperTest.java +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkerHarnessHelperTest.java @@ -29,6 +29,7 @@ import org.apache.beam.model.pipeline.v1.Endpoints; import org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions; import org.apache.beam.runners.dataflow.worker.logging.DataflowWorkerLoggingMDC; +import org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions; import org.apache.beam.runners.dataflow.worker.testing.RestoreDataflowLoggingMDC; import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.testing.RestoreSystemProperties; @@ -63,7 +64,7 @@ public void testLoggingConfiguration() throws Exception { DataflowWorkerHarnessOptions generatedOptions = DataflowWorkerHarnessHelper.initializeGlobalStateAndPipelineOptions( - DataflowBatchWorkerHarnessTest.class); + DataflowBatchWorkerHarnessTest.class, DataflowWorkerHarnessOptions.class); // Assert that the returned options are correct. assertThat(generatedOptions.getJobId(), equalTo(JOB_ID)); assertThat(generatedOptions.getWorkerId(), equalTo(WORKER_ID)); @@ -88,4 +89,31 @@ public void testParseDescriptor() throws TextFormat.ParseException { public void testParseStatusApiDescriptor() throws TextFormat.ParseException { assertNull(DataflowWorkerHarnessHelper.getStatusDescriptor()); } + + @Test + public void testStreamingStreamingConfiguration() throws Exception { + StreamingDataflowWorkerOptions pipelineOptions = + PipelineOptionsFactory.as(StreamingDataflowWorkerOptions.class); + pipelineOptions.setJobId(JOB_ID); + pipelineOptions.setWorkerId(WORKER_ID); + int activeWorkRefreshPeriodMillis = 12345; + pipelineOptions.setActiveWorkRefreshPeriodMillis(activeWorkRefreshPeriodMillis); + int stuckCommitDurationMillis = 23456; + pipelineOptions.setStuckCommitDurationMillis(stuckCommitDurationMillis); + String serializedOptions = new ObjectMapper().writeValueAsString(pipelineOptions); + File file = tmpFolder.newFile(); + Files.write(Paths.get(file.getPath()), serializedOptions.getBytes(StandardCharsets.UTF_8)); + System.setProperty("sdk_pipeline_options_file", file.getPath()); + + StreamingDataflowWorkerOptions generatedOptions = + DataflowWorkerHarnessHelper.initializeGlobalStateAndPipelineOptions( + DataflowBatchWorkerHarnessTest.class, StreamingDataflowWorkerOptions.class); + // Assert that the returned options are correct. + assertThat(generatedOptions.getJobId(), equalTo(JOB_ID)); + assertThat(generatedOptions.getWorkerId(), equalTo(WORKER_ID)); + assertThat( + generatedOptions.getActiveWorkRefreshPeriodMillis(), + equalTo(activeWorkRefreshPeriodMillis)); + assertThat(generatedOptions.getStuckCommitDurationMillis(), equalTo(stuckCommitDurationMillis)); + } } diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverterTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverterTest.java index a37bf643b7ee..0a4cd06cf8ca 100644 --- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverterTest.java +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverterTest.java @@ -81,7 +81,7 @@ public void testConvert_successfulyConvertCounters() { Map counters = new HashMap(); MetricName bigQueryMetric1 = MetricName.named("BigQuerySink", "metric1"); MetricName bigQueryMetric2 = - MetricName.named("BigQuerySink", "metric2-label1:val1;label2:val2;"); + MetricName.named("BigQuerySink", "metric2*label1:val1;label2:val2;"); MetricName bigQueryMetric3 = MetricName.named("BigQuerySink", "zeroValue"); counters.put(bigQueryMetric1, 5L); @@ -112,14 +112,14 @@ public void testConvert_successfulyConvertCounters() { @Test public void testConvert_skipInvalidMetricNames() { Map counters = new HashMap<>(); - MetricName bigQueryMetric1 = MetricName.named("BigQuerySink", "invalid-metric-name1"); - counters.put(bigQueryMetric1, 5L); + MetricName invalidName1 = MetricName.named("BigQuerySink", "**"); + counters.put(invalidName1, 5L); Map histograms = new HashMap<>(); - MetricName bigQueryMetric2 = MetricName.named("BigQuerySink", "invalid-metric-name2"); + MetricName invalidName2 = MetricName.named("BigQuerySink", "****"); HistogramData nonEmptyLinearHistogram = HistogramData.linear(0, 10, 10); nonEmptyLinearHistogram.record(-5.0); - histograms.put(bigQueryMetric2, nonEmptyLinearHistogram); + histograms.put(invalidName2, nonEmptyLinearHistogram); Collection conversionResult = MetricsToPerStepNamespaceMetricsConverter.convert("testStep", counters, histograms); @@ -131,7 +131,7 @@ public void testConvert_successfulConvertHistograms() { Map histograms = new HashMap(); MetricName bigQueryMetric1 = MetricName.named("BigQuerySink", "baseLabel"); MetricName bigQueryMetric2 = - MetricName.named("BigQuerySink", "baseLabel-label1:val1;label2:val2;"); + MetricName.named("BigQuerySink", "baseLabel*label1:val1;label2:val2;"); MetricName bigQueryMetric3 = MetricName.named("BigQuerySink", "zeroValue"); HistogramData nonEmptyLinearHistogram = HistogramData.linear(0, 10, 10); @@ -235,10 +235,10 @@ public void testConvert_convertCountersAndHistograms() { Map counters = new HashMap<>(); Map histograms = new HashMap(); - MetricName counterMetricName = MetricName.named("BigQuerySink", "counter-label1:val1;"); + MetricName counterMetricName = MetricName.named("BigQuerySink", "counter*label1:val1;"); counters.put(counterMetricName, 3L); - MetricName histogramMetricName = MetricName.named("BigQuerySink", "histogram-label2:val2;"); + MetricName histogramMetricName = MetricName.named("BigQuerySink", "histogram*label2:val2;"); HistogramData linearHistogram = HistogramData.linear(0, 10, 10); linearHistogram.record(5.0); histograms.put(histogramMetricName, linearHistogram); diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java index 5de67a371a65..e7eedcf3780a 100644 --- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java @@ -798,8 +798,7 @@ public void testBasicHarness() throws Exception { verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any()); } - @Test - public void testBasic() throws Exception { + private void runTestBasic(int numCommitThreads) throws Exception { List instructions = Arrays.asList( makeSourceInstruction(StringUtf8Coder.of()), @@ -817,6 +816,7 @@ public void testBasic() throws Exception { when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem)); StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server); + options.setWindmillServiceCommitThreads(numCommitThreads); StreamingDataflowWorker worker = makeWorker(instructions, options, true /* publishCounters */); worker.start(); @@ -838,6 +838,16 @@ public void testBasic() throws Exception { verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any()); } + @Test + public void testBasic() throws Exception { + runTestBasic(1); + } + + @Test + public void testBasicWithMultipleCommitThreads() throws Exception { + runTestBasic(2); + } + @Test public void testHotKeyLogging() throws Exception { // This is to test that the worker can correctly log the key from a hot key. @@ -3832,6 +3842,38 @@ public void testStuckCommit() throws Exception { removeDynamicFields(result.get(1L))); } + private void runNumCommitThreadsTest(int configNumCommitThreads, int expectedNumCommitThreads) + throws Exception { + List instructions = + Arrays.asList( + makeSourceInstruction(StringUtf8Coder.of()), + makeSinkInstruction(StringUtf8Coder.of(), 0)); + FakeWindmillServer server = new FakeWindmillServer(errorCollector); + StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server); + options.setWindmillServiceCommitThreads(configNumCommitThreads); + StreamingDataflowWorker worker = makeWorker(instructions, options, true /* publishCounters */); + worker.start(); + assertEquals(expectedNumCommitThreads, worker.commitThreads.size()); + worker.stop(); + } + + @Test + public void testDefaultNumCommitThreads() throws Exception { + if (streamingEngine) { + runNumCommitThreadsTest(1, 1); + runNumCommitThreadsTest(2, 2); + runNumCommitThreadsTest(3, 3); + runNumCommitThreadsTest(0, 1); + runNumCommitThreadsTest(-1, 1); + } else { + runNumCommitThreadsTest(1, 1); + runNumCommitThreadsTest(2, 1); + runNumCommitThreadsTest(3, 1); + runNumCommitThreadsTest(0, 1); + runNumCommitThreadsTest(-1, 1); + } + } + static class BlockingFn extends DoFn implements TestRule { public static CountDownLatch blocker = new CountDownLatch(1); diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainerTest.java index cc0722e7a8e6..c586f2b87814 100644 --- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainerTest.java +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainerTest.java @@ -226,10 +226,10 @@ public void testPerWorkerMetrics() { @Test public void testExtractPerWorkerMetricUpdates() { StreamingStepMetricsContainer.setEnablePerWorkerMetrics(true); - MetricName counterMetricName = MetricName.named("BigQuerySink", "counter-"); + MetricName counterMetricName = MetricName.named("BigQuerySink", "counter"); c1.getPerWorkerCounter(counterMetricName).inc(3); - MetricName histogramMetricName = MetricName.named("BigQuerySink", "histogram-"); + MetricName histogramMetricName = MetricName.named("BigQuerySink", "histogram"); HistogramData.LinearBuckets linearBuckets = HistogramData.LinearBuckets.of(0, 10, 10); c2.getPerWorkerHistogram(histogramMetricName, linearBuckets).update(5.0); diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WorkerPipelineOptionsFactoryTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WorkerPipelineOptionsFactoryTest.java index f8684edfa2e4..62d38d434b8f 100644 --- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WorkerPipelineOptionsFactoryTest.java +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WorkerPipelineOptionsFactoryTest.java @@ -52,7 +52,7 @@ public void testCreationFromSystemProperties() throws Exception { @SuppressWarnings("deprecation") // testing deprecated functionality DataflowWorkerHarnessOptions options = - WorkerPipelineOptionsFactory.createFromSystemProperties(); + WorkerPipelineOptionsFactory.createFromSystemProperties(DataflowWorkerHarnessOptions.class); assertEquals("test_worker_id", options.getWorkerId()); assertEquals("test_job_id", options.getJobId()); assertEquals(999, options.getNumWorkers()); @@ -74,7 +74,7 @@ public void testCreationWithPipelineOptionsFile() throws Exception { @SuppressWarnings("deprecation") // testing deprecated functionality DataflowWorkerHarnessOptions options = - WorkerPipelineOptionsFactory.createFromSystemProperties(); + WorkerPipelineOptionsFactory.createFromSystemProperties(DataflowWorkerHarnessOptions.class); assertEquals("test_worker_id_2", options.getWorkerId()); assertEquals("test_job_id_2", options.getJobId()); assertEquals(1000, options.getNumWorkers()); diff --git a/runners/java-fn-execution/build.gradle b/runners/java-fn-execution/build.gradle index e055bc991e2a..5e19e4cba102 100644 --- a/runners/java-fn-execution/build.gradle +++ b/runners/java-fn-execution/build.gradle @@ -30,7 +30,6 @@ dependencies { implementation project(path: ":model:pipeline", configuration: "shadow") implementation project(path: ":model:fn-execution", configuration: "shadow") implementation project(path: ":sdks:java:core", configuration: "shadow") - implementation project(":sdks:java:fn-execution") implementation project(":runners:core-construction-java") implementation library.java.vendored_grpc_1_60_1 implementation library.java.slf4j_api diff --git a/runners/java-job-service/build.gradle b/runners/java-job-service/build.gradle index a1b4aba2e908..e3d26ece4765 100644 --- a/runners/java-job-service/build.gradle +++ b/runners/java-job-service/build.gradle @@ -29,7 +29,6 @@ dependencies { implementation project(path: ":sdks:java:core", configuration: "shadow") implementation project(path: ":model:job-management", configuration: "shadow") implementation project(":sdks:java:expansion-service") - implementation project(":sdks:java:fn-execution") implementation project(":runners:core-construction-java") implementation project(":runners:java-fn-execution") implementation library.java.jackson_core diff --git a/runners/samza/build.gradle b/runners/samza/build.gradle index 4e410fedeccc..fbbec322c87b 100644 --- a/runners/samza/build.gradle +++ b/runners/samza/build.gradle @@ -70,7 +70,6 @@ dependencies { implementation project(path: ":model:fn-execution", configuration: "shadow") implementation project(path: ":model:job-management", configuration: "shadow") implementation project(path: ":model:pipeline", configuration: "shadow") - implementation project(":sdks:java:fn-execution") testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") testImplementation project(path: ":runners:core-java", configuration: "testRuntimeMigration") testImplementation library.java.hamcrest diff --git a/runners/spark/spark_runner.gradle b/runners/spark/spark_runner.gradle index 3da819ad2f37..5e74ccd2b195 100644 --- a/runners/spark/spark_runner.gradle +++ b/runners/spark/spark_runner.gradle @@ -172,7 +172,6 @@ dependencies { implementation library.java.args4j implementation project(path: ":model:fn-execution", configuration: "shadow") implementation project(path: ":model:job-management", configuration: "shadow") - implementation project(":sdks:java:fn-execution") implementation library.java.vendored_grpc_1_60_1 implementation library.java.vendored_guava_32_1_2_jre spark.components.each { component -> diff --git a/scripts/ci/pr-bot/updateReviewers.ts b/scripts/ci/pr-bot/updateReviewers.ts index c49f8feba24f..96eaa6e22f63 100644 --- a/scripts/ci/pr-bot/updateReviewers.ts +++ b/scripts/ci/pr-bot/updateReviewers.ts @@ -104,6 +104,7 @@ async function getReviewersForPull(pull: any): Promise { for (const comment of comments) { if ( + comment.user && comment.user.login && comment.user.login !== pull.user.login && comment.user.login !== BOT_NAME diff --git a/sdks/go.mod b/sdks/go.mod index 1c5874a49c1e..8683c181a99c 100644 --- a/sdks/go.mod +++ b/sdks/go.mod @@ -28,11 +28,11 @@ require ( cloud.google.com/go/datastore v1.15.0 cloud.google.com/go/profiler v0.4.0 cloud.google.com/go/pubsub v1.33.0 - cloud.google.com/go/spanner v1.53.1 + cloud.google.com/go/spanner v1.56.0 cloud.google.com/go/storage v1.36.0 - github.com/aws/aws-sdk-go-v2 v1.24.0 + github.com/aws/aws-sdk-go-v2 v1.24.1 github.com/aws/aws-sdk-go-v2/config v1.26.2 - github.com/aws/aws-sdk-go-v2/credentials v1.16.13 + github.com/aws/aws-sdk-go-v2/credentials v1.16.16 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.13.8 github.com/aws/aws-sdk-go-v2/service/s3 v1.42.2 github.com/aws/smithy-go v1.19.0 @@ -45,24 +45,24 @@ require ( github.com/johannesboyne/gofakes3 v0.0.0-20221110173912-32fb85c5aed6 github.com/lib/pq v1.10.9 github.com/linkedin/goavro/v2 v2.12.0 - github.com/nats-io/nats-server/v2 v2.10.7 - github.com/nats-io/nats.go v1.31.0 + github.com/nats-io/nats-server/v2 v2.10.10 + github.com/nats-io/nats.go v1.32.0 github.com/proullon/ramsql v0.1.3 github.com/spf13/cobra v1.8.0 github.com/testcontainers/testcontainers-go v0.26.0 - github.com/tetratelabs/wazero v1.5.0 + github.com/tetratelabs/wazero v1.6.0 github.com/xitongsys/parquet-go v1.6.2 github.com/xitongsys/parquet-go-source v0.0.0-20220315005136-aec0fe3e777c go.mongodb.org/mongo-driver v1.13.1 - golang.org/x/net v0.19.0 - golang.org/x/oauth2 v0.15.0 - golang.org/x/sync v0.5.0 - golang.org/x/sys v0.15.0 + golang.org/x/net v0.20.0 + golang.org/x/oauth2 v0.16.0 + golang.org/x/sync v0.6.0 + golang.org/x/sys v0.16.0 golang.org/x/text v0.14.0 - google.golang.org/api v0.154.0 - google.golang.org/genproto v0.0.0-20231120223509-83a465c0220f - google.golang.org/grpc v1.60.1 - google.golang.org/protobuf v1.31.0 + google.golang.org/api v0.157.0 + google.golang.org/genproto v0.0.0-20240116215550-a9fa1716bcac + google.golang.org/grpc v1.61.0 + google.golang.org/protobuf v1.32.0 gopkg.in/retry.v1 v1.0.3 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 @@ -75,19 +75,27 @@ require ( require ( dario.cat/mergo v1.0.0 // indirect - github.com/Microsoft/hcsshim v0.11.1 // indirect + github.com/Microsoft/hcsshim v0.11.4 // indirect github.com/containerd/log v0.1.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/frankban/quicktest v1.14.0 // indirect github.com/go-logr/logr v1.3.0 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect + github.com/json-iterator/go v1.1.12 // indirect github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect github.com/minio/highwayhash v1.0.2 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect github.com/nats-io/jwt/v2 v2.5.3 // indirect - github.com/nats-io/nkeys v0.4.6 // indirect + github.com/nats-io/nkeys v0.4.7 // indirect github.com/nats-io/nuid v1.0.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/shirou/gopsutil/v3 v3.23.9 // indirect github.com/shoenig/go-m1cpu v0.1.6 // indirect + github.com/stretchr/objx v0.5.0 // indirect + github.com/stretchr/testify v1.8.4 // indirect github.com/tklauser/go-sysconf v0.3.12 // indirect github.com/tklauser/numcpus v0.6.1 // indirect github.com/yusufpapurcu/wmi v1.2.3 // indirect @@ -95,12 +103,13 @@ require ( go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1 // indirect go.opentelemetry.io/otel v1.21.0 // indirect go.opentelemetry.io/otel/metric v1.21.0 // indirect + go.opentelemetry.io/otel/sdk v1.21.0 // indirect go.opentelemetry.io/otel/trace v1.21.0 // indirect golang.org/x/time v0.5.0 // indirect ) require ( - cloud.google.com/go v0.110.10 // indirect + cloud.google.com/go v0.112.0 // indirect cloud.google.com/go/compute v1.23.3 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect cloud.google.com/go/iam v1.1.5 // indirect @@ -113,24 +122,24 @@ require ( github.com/apache/thrift v0.16.0 // indirect github.com/aws/aws-sdk-go v1.34.0 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.1 // indirect - github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.10 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.9 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.9 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.11 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.10 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.10 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.7.2 // indirect github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.3 // indirect github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.4 // indirect github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.3 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.9 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.10 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.3 // indirect - github.com/aws/aws-sdk-go-v2/service/sso v1.18.5 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.5 // indirect - github.com/aws/aws-sdk-go-v2/service/sts v1.26.6 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.18.7 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.7 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.26.7 // indirect github.com/cenkalti/backoff/v4 v4.2.1 // indirect github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe // indirect - github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 // indirect - github.com/containerd/containerd v1.7.7 // indirect + github.com/cncf/xds/go v0.0.0-20231109132714-523115ebc101 // indirect + github.com/containerd/containerd v1.7.11 // indirect github.com/cpuguy83/dockercfg v0.3.1 // indirect github.com/docker/distribution v2.8.2+incompatible // indirect github.com/docker/docker v24.0.7+incompatible // but required to resolve issue docker has with go1.20 @@ -153,9 +162,8 @@ require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/klauspost/asmfmt v1.3.2 // indirect - github.com/klauspost/compress v1.17.4 // indirect + github.com/klauspost/compress v1.17.5 // indirect github.com/klauspost/cpuid/v2 v2.2.6 // indirect - github.com/kr/text v0.2.0 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect @@ -166,7 +174,7 @@ require ( github.com/morikuni/aec v1.0.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.0-rc5 // indirect - github.com/opencontainers/runc v1.1.5 // indirect + github.com/opencontainers/runc v1.1.12 // indirect github.com/pierrec/lz4/v4 v4.1.15 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pkg/xattr v0.4.9 // indirect @@ -180,11 +188,11 @@ require ( github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect github.com/zeebo/xxh3 v1.0.2 // indirect go.opencensus.io v0.24.0 // indirect - golang.org/x/crypto v0.17.0 // indirect + golang.org/x/crypto v0.18.0 // indirect golang.org/x/mod v0.11.0 // indirect golang.org/x/tools v0.10.0 // indirect - golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect + golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect google.golang.org/appengine v1.6.8 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20231120223509-83a465c0220f // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20231127180814-3a041ad873d4 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240122161410-6c6643bf1457 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240122161410-6c6643bf1457 // indirect ) diff --git a/sdks/go.sum b/sdks/go.sum index 7dc312f9cba2..acb53d867165 100644 --- a/sdks/go.sum +++ b/sdks/go.sum @@ -8,8 +8,8 @@ cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4= cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= -cloud.google.com/go v0.110.10 h1:LXy9GEO+timppncPIAZoOj3l58LIU9k+kn48AN7IO3Y= -cloud.google.com/go v0.110.10/go.mod h1:v1OoFqYxiBkUrruItNM3eT4lLByNjxmJSV/xDKJNnic= +cloud.google.com/go v0.112.0 h1:tpFCD7hpHFlQ8yPwT3x+QeXqc2T6+n6T+hmABHfDUSM= +cloud.google.com/go v0.112.0/go.mod h1:3jEEVwZ/MHU4djK5t5RHuKOA/GbLddgTdVubX1qnPD4= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= @@ -38,8 +38,8 @@ cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+ cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= cloud.google.com/go/pubsub v1.33.0 h1:6SPCPvWav64tj0sVX/+npCBKhUi/UjJehy9op/V3p2g= cloud.google.com/go/pubsub v1.33.0/go.mod h1:f+w71I33OMyxf9VpMVcZbnG5KSUkCOUHYpFd5U1GdRc= -cloud.google.com/go/spanner v1.53.1 h1:xNmE0SXMSxNBuk7lRZ5G/S+A49X91zkSTt7Jn5Ptlvw= -cloud.google.com/go/spanner v1.53.1/go.mod h1:liG4iCeLqm5L3fFLU5whFITqP0e0orsAW1uUSrd4rws= +cloud.google.com/go/spanner v1.56.0 h1:o/Cv7/zZ1WgRXVCd5g3Nc23ZI39p/1pWFqFwvg6Wcu8= +cloud.google.com/go/spanner v1.56.0/go.mod h1:DndqtUKQAt3VLuV2Le+9Y3WTnq5cNKrnLb/Piqcj+h0= cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= @@ -64,8 +64,8 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= -github.com/Microsoft/hcsshim v0.11.1 h1:hJ3s7GbWlGK4YVV92sO88BQSyF4ZLVy7/awqOlPxFbA= -github.com/Microsoft/hcsshim v0.11.1/go.mod h1:nFJmaO4Zr5Y7eADdFOpYswDDlNVbvcIJJNJLECr5JQg= +github.com/Microsoft/hcsshim v0.11.4 h1:68vKo2VN8DE9AdN4tnkWnmdhqdbpUFM8OF3Airm7fz8= +github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w= github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 h1:byKBBF2CKWBjjA4J1ZL2JXttJULvWSl50LegTyRZ728= @@ -81,26 +81,26 @@ github.com/aws/aws-sdk-go v1.30.19/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZve github.com/aws/aws-sdk-go v1.34.0 h1:brux2dRrlwCF5JhTL7MUT3WUwo9zfDHZZp3+g3Mvlmo= github.com/aws/aws-sdk-go v1.34.0/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= github.com/aws/aws-sdk-go-v2 v1.7.1/go.mod h1:L5LuPC1ZgDr2xQS7AmIec/Jlc7O/Y1u2KxJyNVab250= -github.com/aws/aws-sdk-go-v2 v1.24.0 h1:890+mqQ+hTpNuw0gGP6/4akolQkSToDJgHfQE7AwGuk= -github.com/aws/aws-sdk-go-v2 v1.24.0/go.mod h1:LNh45Br1YAkEKaAqvmE1m8FUx6a5b/V0oAKV7of29b4= +github.com/aws/aws-sdk-go-v2 v1.24.1 h1:xAojnj+ktS95YZlDf0zxWBkbFtymPeDP+rvUQIH3uAU= +github.com/aws/aws-sdk-go-v2 v1.24.1/go.mod h1:LNh45Br1YAkEKaAqvmE1m8FUx6a5b/V0oAKV7of29b4= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.1 h1:ZY3108YtBNq96jNZTICHxN1gSBSbnvIdYwwqnvCV4Mc= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.1/go.mod h1:t8PYl/6LzdAqsU4/9tz28V/kU+asFePvpOMkdul0gEQ= github.com/aws/aws-sdk-go-v2/config v1.5.0/go.mod h1:RWlPOAW3E3tbtNAqTwvSW54Of/yP3oiZXMI0xfUdjyA= github.com/aws/aws-sdk-go-v2/config v1.26.2 h1:+RWLEIWQIGgrz2pBPAUoGgNGs1TOyF4Hml7hCnYj2jc= github.com/aws/aws-sdk-go-v2/config v1.26.2/go.mod h1:l6xqvUxt0Oj7PI/SUXYLNyZ9T/yBPn3YTQcJLLOdtR8= github.com/aws/aws-sdk-go-v2/credentials v1.3.1/go.mod h1:r0n73xwsIVagq8RsxmZbGSRQFj9As3je72C2WzUIToc= -github.com/aws/aws-sdk-go-v2/credentials v1.16.13 h1:WLABQ4Cp4vXtXfOWOS3MEZKr6AAYUpMczLhgKtAjQ/8= -github.com/aws/aws-sdk-go-v2/credentials v1.16.13/go.mod h1:Qg6x82FXwW0sJHzYruxGiuApNo31UEtJvXVSZAXeWiw= +github.com/aws/aws-sdk-go-v2/credentials v1.16.16 h1:8q6Rliyv0aUFAVtzaldUEcS+T5gbadPbWdV1WcAddK8= +github.com/aws/aws-sdk-go-v2/credentials v1.16.16/go.mod h1:UHVZrdUsv63hPXFo1H7c5fEneoVo9UXiz36QG1GEPi0= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.3.0/go.mod h1:2LAuqPx1I6jNfaGDucWfA2zqQCYCOMCDHiCOciALyNw= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.10 h1:w98BT5w+ao1/r5sUuiH6JkVzjowOKeOJRHERyy1vh58= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.10/go.mod h1:K2WGI7vUvkIv1HoNbfBA1bvIZ+9kL3YVmWxeKuLQsiw= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.11 h1:c5I5iH+DZcH3xOIMlz3/tCKJDaHFwYEmxvlh2fAcFo8= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.11/go.mod h1:cRrYDYAMUohBJUtUnOhydaMHtiK/1NZ0Otc9lIb6O0Y= github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.3.2/go.mod h1:qaqQiHSrOUVOfKe6fhgQ6UzhxjwqVW8aHNegd6Ws4w4= github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.13.8 h1:wuOjvalpd2CnXffks74Vq6n3yv9vunKCoy4R1sjStGk= github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.13.8/go.mod h1:vywwjy6VnrR48Izg136JoSUXC4mH9QeUi3g0EH9DSrA= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.9 h1:v+HbZaCGmOwnTTVS86Fleq0vPzOd7tnJGbFhP0stNLs= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.9/go.mod h1:Xjqy+Nyj7VDLBtCMkQYOw1QYfAEZCVLrfI0ezve8wd4= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.9 h1:N94sVhRACtXyVcjXxrwK1SKFIJrA9pOJ5yu2eSHnmls= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.9/go.mod h1:hqamLz7g1/4EJP+GH5NBhcUMLjW+gKLQabgyz6/7WAU= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.10 h1:vF+Zgd9s+H4vOXd5BMaPWykta2a6Ih0AKLq/X6NYKn4= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.10/go.mod h1:6BkRjejp/GR4411UGqkX8+wFMbFbqsUIimfK4XjOKR4= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.10 h1:nYPe006ktcqUji8S2mqXf9c/7NdiKriOwMvWQHgYztw= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.10/go.mod h1:6UV4SZkVvmODfXKql4LCbaZUpF7HO2BX38FgBf9ZOLw= github.com/aws/aws-sdk-go-v2/internal/ini v1.1.1/go.mod h1:Zy8smImhTdOETZqfyn01iNOe0CNggVbPjCajyaz6Gvg= github.com/aws/aws-sdk-go-v2/internal/ini v1.7.2 h1:GrSw8s0Gs/5zZ0SX+gX4zQjRnRsMJDJ2sLur1gRBhEM= github.com/aws/aws-sdk-go-v2/internal/ini v1.7.2/go.mod h1:6fQQgfuGmw8Al/3M2IgIllycxV7ZW7WCdVSqfBeUiCY= @@ -112,8 +112,8 @@ github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.4/go.mod h1: github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.3 h1:xbwRyCy7kXrOj89iIKLB6NfE2WCpP9HoKyk8dMDvnIQ= github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.3/go.mod h1:R+/S1O4TYpcktbVwddeOYg+uwUfLhADP2S/x4QwsCTM= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.2.1/go.mod h1:zceowr5Z1Nh2WVP8bf/3ikB41IZW59E4yIYbg+pC6mw= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.9 h1:Nf2sHxjMJR8CSImIVCONRi4g0Su3J+TSTbS7G0pUeMU= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.9/go.mod h1:idky4TER38YIjr2cADF1/ugFMKvZV7p//pVeV5LZbF0= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.10 h1:DBYTXwIGQSGs9w4jKm60F5dmCQ3EEruxdc0MFh+3EY4= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.10/go.mod h1:wohMUQiFdzo0NtxbBg0mSRGZ4vL3n0dKjLTINdcIino= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.5.1/go.mod h1:6EQZIwNNvHpq/2/QSJnp4+ECvqIy55w95Ofs0ze+nGQ= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.3 h1:KV0z2RDc7euMtg8aUT1czv5p29zcLlXALNFsd3jkkEc= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.3/go.mod h1:KZgs2ny8HsxRIRbDwgvJcHHBZPOzQr/+NtGwnP+w2ec= @@ -121,13 +121,13 @@ github.com/aws/aws-sdk-go-v2/service/s3 v1.11.1/go.mod h1:XLAGFrEjbvMCLvAtWLLP32 github.com/aws/aws-sdk-go-v2/service/s3 v1.42.2 h1:NnduxUd9+Fq9DcCDdJK8v6l9lR1xDX4usvog+JuQAno= github.com/aws/aws-sdk-go-v2/service/s3 v1.42.2/go.mod h1:NXRKkiRF+erX2hnybnVU660cYT5/KChRD4iUgJ97cI8= github.com/aws/aws-sdk-go-v2/service/sso v1.3.1/go.mod h1:J3A3RGUvuCZjvSuZEcOpHDnzZP/sKbhDWV2T1EOzFIM= -github.com/aws/aws-sdk-go-v2/service/sso v1.18.5 h1:ldSFWz9tEHAwHNmjx2Cvy1MjP5/L9kNoR0skc6wyOOM= -github.com/aws/aws-sdk-go-v2/service/sso v1.18.5/go.mod h1:CaFfXLYL376jgbP7VKC96uFcU8Rlavak0UlAwk1Dlhc= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.5 h1:2k9KmFawS63euAkY4/ixVNsYYwrwnd5fIvgEKkfZFNM= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.5/go.mod h1:W+nd4wWDVkSUIox9bacmkBP5NMFQeTJ/xqNabpzSR38= +github.com/aws/aws-sdk-go-v2/service/sso v1.18.7 h1:eajuO3nykDPdYicLlP3AGgOyVN3MOlFmZv7WGTuJPow= +github.com/aws/aws-sdk-go-v2/service/sso v1.18.7/go.mod h1:+mJNDdF+qiUlNKNC3fxn74WWNN+sOiGOEImje+3ScPM= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.7 h1:QPMJf+Jw8E1l7zqhZmMlFw6w1NmfkfiSK8mS4zOx3BA= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.7/go.mod h1:ykf3COxYI0UJmxcfcxcVuz7b6uADi1FkiUz6Eb7AgM8= github.com/aws/aws-sdk-go-v2/service/sts v1.6.0/go.mod h1:q7o0j7d7HrJk/vr9uUt3BVRASvcU7gYZB9PUgPiByXg= -github.com/aws/aws-sdk-go-v2/service/sts v1.26.6 h1:HJeiuZ2fldpd0WqngyMR6KW7ofkXNLyOaHwEIGm39Cs= -github.com/aws/aws-sdk-go-v2/service/sts v1.26.6/go.mod h1:XX5gh4CB7wAs4KhcF46G6C8a2i7eupU19dcAAE+EydU= +github.com/aws/aws-sdk-go-v2/service/sts v1.26.7 h1:NzO4Vrau795RkUdSHKEwiR01FaGzGOH1EETJ+5QHnm0= +github.com/aws/aws-sdk-go-v2/service/sts v1.26.7/go.mod h1:6h2YuIoxaMSCFf5fi1EgZAwdfkGMgDY+DVfa61uLe4U= github.com/aws/smithy-go v1.6.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E= github.com/aws/smithy-go v1.19.0 h1:KWFKQV80DpP3vJrrA9sVAHQ5gc2z8i4EzrLhLlWXcBM= github.com/aws/smithy-go v1.19.0/go.mod h1:NukqUGpCZIILqqiV0NIjeFh24kd/FAa4beRb6nbIUPE= @@ -138,32 +138,26 @@ github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMr github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= -github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe h1:QQ3GSy+MqSHxm/d8nCtnAiZdYFd45cYZPs8vOOIYKfk= github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+gqO04wryn5h75LSazbRlnya1k= -github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20231109132714-523115ebc101 h1:7To3pQ+pZo0i3dsWEbinPNFs5gPSBOsJtx3wTT94VBY= +github.com/cncf/xds/go v0.0.0-20231109132714-523115ebc101/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/colinmarc/hdfs/v2 v2.1.1/go.mod h1:M3x+k8UKKmxtFu++uAZ0OtDU8jR3jnaZIAc6yK4Ue0c= -github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= -github.com/containerd/containerd v1.7.7 h1:QOC2K4A42RQpcrZyptP6z9EJZnlHfHJUfZrAAHe15q4= -github.com/containerd/containerd v1.7.7/go.mod h1:3c4XZv6VeT9qgf9GMTxNTMFxGJrGpI2vz1yk4ye+YY8= +github.com/containerd/containerd v1.7.11 h1:lfGKw3eU35sjV0aG2eYZTiwFEY1pCzxdzicHP3SZILw= +github.com/containerd/containerd v1.7.11/go.mod h1:5UluHxHTX2rdvYuZ5OJTC5m/KJNs0Zs9wVoJm9zf5ZE= github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= -github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/cpuguy83/dockercfg v0.3.1 h1:/FpZ+JaygUR/lZP2NlFI2DVfrOEMAIKP5wWEJdoYe9E= github.com/cpuguy83/dockercfg v0.3.1/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc= -github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= -github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -173,7 +167,6 @@ github.com/docker/docker v24.0.7+incompatible h1:Wo6l37AuwP3JaMnZa226lzVXGA3F9Ig github.com/docker/docker v24.0.7+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= -github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= @@ -190,8 +183,8 @@ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= github.com/frankban/quicktest v1.2.2/go.mod h1:Qh/WofXFeiAFII1aEBu529AtJo6Zg2VHscnEsbBnJ20= -github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY= -github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= +github.com/frankban/quicktest v1.14.0 h1:+cqqvzZV87b4adx/5ayVOaYZ2CrvM4ejQvUdBzPPUss= +github.com/frankban/quicktest v1.14.0/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og= github.com/fsouza/fake-gcs-server v1.47.7 h1:56/U4rKY081TaNbq0gHWi7/71UxC2KROqcnrD9BRJhs= github.com/fsouza/fake-gcs-server v1.47.7/go.mod h1:4vPUynN8/zZlxk5Jpy6LvvTTxItdTAObK4DYnp89Jys= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= @@ -210,8 +203,6 @@ github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrt github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= github.com/goccy/go-json v0.9.11 h1:/pAaQDLHEoCq/5FFmSKBswWmK6H0e8g4159Kc/X/nqk= github.com/goccy/go-json v0.9.11/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= -github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= @@ -268,6 +259,7 @@ github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPgecwXBIDzw5no= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.3.2 h1:IqNFLAmvJOgVlpdEBiQbDc2EwKW77amAycfTuWKdfvw= @@ -317,6 +309,7 @@ github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfC github.com/johannesboyne/gofakes3 v0.0.0-20221110173912-32fb85c5aed6 h1:eQGUsj2LcsLzfrHY1noKDSU7h+c9/rw9pQPwbQ9g1jQ= github.com/johannesboyne/gofakes3 v0.0.0-20221110173912-32fb85c5aed6/go.mod h1:LIAXxPvcUXwOcTIj9LSNSUpE9/eMHalTWxsP/kmWxQI= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= @@ -326,13 +319,14 @@ github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= -github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/klauspost/compress v1.17.5 h1:d4vBd+7CHydUqpFBgUEKkSdtSugf9YFmSkvUYPquI5E= +github.com/klauspost/compress v1.17.5/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc= github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= +github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -357,26 +351,27 @@ github.com/minio/minio-go/v7 v7.0.66 h1:bnTOXOHjOqv/gcMuiVbN9o2ngRItvqE774dG9nq0 github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM= github.com/moby/patternmatcher v0.6.0 h1:GmP9lR19aU5GqSSFko+5pRqHi+Ohk1O69aFiKkVGiPk= github.com/moby/patternmatcher v0.6.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc= -github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc= github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo= github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe h1:iruDEfMl2E6fbMZ9s0scYfZQ84/6SPL6zC8ACM2oIL0= github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= -github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/nats-io/jwt/v2 v2.5.3 h1:/9SWvzc6hTfamcgXJ3uYRpgj+QuY2aLNqRiqrKcrpEo= github.com/nats-io/jwt/v2 v2.5.3/go.mod h1:iysuPemFcc7p4IoYots3IuELSI4EDe9Y0bQMe+I3Bf4= -github.com/nats-io/nats-server/v2 v2.10.7 h1:f5VDy+GMu7JyuFA0Fef+6TfulfCs5nBTgq7MMkFJx5Y= -github.com/nats-io/nats-server/v2 v2.10.7/go.mod h1:V2JHOvPiPdtfDXTuEUsthUnCvSDeFrK4Xn9hRo6du7c= -github.com/nats-io/nats.go v1.31.0 h1:/WFBHEc/dOKBF6qf1TZhrdEfTmOZ5JzdJ+Y3m6Y/p7E= -github.com/nats-io/nats.go v1.31.0/go.mod h1:di3Bm5MLsoB4Bx61CBTsxuarI36WbhAwOm8QrW39+i8= -github.com/nats-io/nkeys v0.4.6 h1:IzVe95ru2CT6ta874rt9saQRkWfe2nFj1NtvYSLqMzY= -github.com/nats-io/nkeys v0.4.6/go.mod h1:4DxZNzenSVd1cYQoAa8948QY3QDjrHfcfVADymtkpts= +github.com/nats-io/nats-server/v2 v2.10.10 h1:g1Wd64J5SGsoqWSx1qoNu9/At7a2x+jE7Qtf2XpEx/I= +github.com/nats-io/nats-server/v2 v2.10.10/go.mod h1:/TE61Dos8NlwZnjzyE3ZlOnM6dgl7tf937dnf4VclrA= +github.com/nats-io/nats.go v1.32.0 h1:Bx9BZS+aXYlxW08k8Gd3yR2s73pV5XSoAQUyp1Kwvp0= +github.com/nats-io/nats.go v1.32.0/go.mod h1:Ubdu4Nh9exXdSz0RVWRFBbRfrbSxOYd26oF0wkWclB8= +github.com/nats-io/nkeys v0.4.7 h1:RwNJbbIdYCoClSDNY7QVKZlyb/wfT6ugvFCiKy6vDvI= +github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDmGD0nc= github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/ncw/swift v1.0.52/go.mod h1:23YIA4yWVnGwv2dQlN4bB7egfYX6YLn0Yo/S6zZO/ZM= @@ -384,10 +379,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.0-rc5 h1:Ygwkfw9bpDvs+c9E34SdgGOj41dX/cbdlwvlWt0pnFI= github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8= -github.com/opencontainers/runc v1.1.5 h1:L44KXEpKmfWDcS02aeGm8QNTFXTo2D+8MYGDIJ/GDEs= -github.com/opencontainers/runc v1.1.5/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg= -github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= +github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss= +github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8= github.com/pborman/getopt v0.0.0-20180729010549-6fdd0a2c7117/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= github.com/pierrec/lz4/v4 v4.1.8/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0= @@ -406,13 +399,12 @@ github.com/proullon/ramsql v0.1.3/go.mod h1:CFGqeQHQpdRfWqYmWD3yXqPTEaHkF4zgXy1C github.com/rogpeppe/clock v0.0.0-20190514195947-2896927a307a h1:3QH7VyOaaiUHNrA9Se4YQIRkDTCw1EJls9xTUCaCeRM= github.com/rogpeppe/clock v0.0.0-20190514195947-2896927a307a/go.mod h1:4r5QyqhjIWCcK8DO4KMclc5Iknq5qVBAlbYYzAbUScQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= +github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= -github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 h1:GHRpF1pTW19a8tTFrMLUcfWwyC0pnifVo2ClaLq+hP8= github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46/go.mod h1:uAQ5PCi+MFsC7HjREoAz1BU+Mq60+05gifQSsHSDG/8= -github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= github.com/shabbyrobe/gocovmerge v0.0.0-20180507124511-f6ea450bfb63 h1:J6qvD6rbmOil46orKqJaRPG+zTpoGlBTUdyv8ki63L0= github.com/shabbyrobe/gocovmerge v0.0.0-20180507124511-f6ea450bfb63/go.mod h1:n+VKSARF5y/tS9XFSP7vWDfS+GUC5vs/YT7M5XDTUEM= github.com/shirou/gopsutil/v3 v3.23.9 h1:ZI5bWVeu2ep4/DIxB4U9okeYJ7zp/QLTO4auRb/ty/E= @@ -421,8 +413,6 @@ github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFt github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ= github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU= github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= -github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/afero v1.2.1/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= @@ -433,6 +423,7 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= @@ -446,18 +437,14 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/testcontainers/testcontainers-go v0.26.0 h1:uqcYdoOHBy1ca7gKODfBd9uTHVK3a7UL848z09MVZ0c= github.com/testcontainers/testcontainers-go v0.26.0/go.mod h1:ICriE9bLX5CLxL9OFQ2N+2N+f+803LNJ1utJb1+Inx0= -github.com/tetratelabs/wazero v1.5.0 h1:Yz3fZHivfDiZFUXnWMPUoiW7s8tC1sjdBtlJn08qYa0= -github.com/tetratelabs/wazero v1.5.0/go.mod h1:0U0G41+ochRKoPKCJlh0jMg1CHkyfK8kDqiirMmKY8A= +github.com/tetratelabs/wazero v1.6.0 h1:z0H1iikCdP8t+q341xqepY4EWvHEw8Es7tlqiVzlP3g= +github.com/tetratelabs/wazero v1.6.0/go.mod h1:0U0G41+ochRKoPKCJlh0jMg1CHkyfK8kDqiirMmKY8A= github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk= github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY= -github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= -github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= -github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= @@ -498,6 +485,8 @@ go.opentelemetry.io/otel v1.21.0 h1:hzLeKBZEL7Okw2mGzZ0cc4k/A7Fta0uoPgaJCr8fsFc= go.opentelemetry.io/otel v1.21.0/go.mod h1:QZzNPQPm1zLX4gZK4cMi+71eaorMSGT3A4znnUvNNEo= go.opentelemetry.io/otel/metric v1.21.0 h1:tlYWfeo+Bocx5kLEloTjbcDwBuELRrIFxwdQ36PlJu4= go.opentelemetry.io/otel/metric v1.21.0/go.mod h1:o1p3CA8nNHW8j5yuQLdc1eeqEaPfzug24uvsyIEJRWM= +go.opentelemetry.io/otel/sdk v1.21.0 h1:FTt8qirL1EysG6sTQRZ5TokkU8d0ugCj8htOgThZXQ8= +go.opentelemetry.io/otel/sdk v1.21.0/go.mod h1:Nna6Yv7PWTdgJHVRD9hIYywQBRx7pbox6nwBnZIxl/E= go.opentelemetry.io/otel/trace v1.21.0 h1:WD9i5gzvoUPuXIXH24ZNBudiarZDKuekPqi/E8fpfLc= go.opentelemetry.io/otel/trace v1.21.0/go.mod h1:LGbsEB0f9LGjN+OZaQQ26sohbOmiMR+BaslueVtS/qQ= golang.org/x/crypto v0.0.0-20180723164146-c126467f60eb/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= @@ -510,8 +499,8 @@ golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k= -golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4= +golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc= +golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -566,19 +555,18 @@ golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c= -golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U= +golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= +golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.15.0 h1:s8pnnxNVzjWyrvYdFUQq5llS1PX2zhPXmccZv99h7uQ= -golang.org/x/oauth2 v0.15.0/go.mod h1:q48ptWNTY5XWf+JNten23lcvHpLJ0ZSxF5ttTHKVCAM= +golang.org/x/oauth2 v0.16.0 h1:aDkGMBSYxElaoP81NpoUoz2oo2R2wHdZpGToUxfyQrQ= +golang.org/x/oauth2 v0.16.0/go.mod h1:hqZ+0LWXsiVoZpeld6jVt06P3adbS2Uu911W1SsJv2o= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -587,8 +575,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= -golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -597,14 +585,11 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191112214154-59a1497f0cea/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -616,13 +601,9 @@ golang.org/x/sys v0.0.0-20200828194041-157a740278f4/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -631,8 +612,8 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= -golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= +golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -690,8 +671,8 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk= -golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.11.0 h1:f1IJhK4Km5tBJmaiJXtk/PkL4cdVX6J+tGiM187uT5E= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= @@ -702,8 +683,8 @@ google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsb google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.154.0 h1:X7QkVKZBskztmpPKWQXgjJRPA2dJYrL6r+sYPRLj050= -google.golang.org/api v0.154.0/go.mod h1:qhSMkM85hgqiokIYsrRyKxrjfBeIhgl4Z2JmeRkYylc= +google.golang.org/api v0.157.0 h1:ORAeqmbrrozeyw5NjnMxh7peHO0UzV4wWYSwZeCUb20= +google.golang.org/api v0.157.0/go.mod h1:+z4v4ufbZ1WEpld6yMGHyggs+PmAHiaLNj5ytP3N01g= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -729,12 +710,12 @@ google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4 google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20231120223509-83a465c0220f h1:Vn+VyHU5guc9KjB5KrjI2q0wCOWEOIh0OEsleqakHJg= -google.golang.org/genproto v0.0.0-20231120223509-83a465c0220f/go.mod h1:nWSwAFPb+qfNJXsoeO3Io7zf4tMSfN8EA8RlDA04GhY= -google.golang.org/genproto/googleapis/api v0.0.0-20231120223509-83a465c0220f h1:2yNACc1O40tTnrsbk9Cv6oxiW8pxI/pXj0wRtdlYmgY= -google.golang.org/genproto/googleapis/api v0.0.0-20231120223509-83a465c0220f/go.mod h1:Uy9bTZJqmfrw2rIBxgGLnamc78euZULUBrLZ9XTITKI= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231127180814-3a041ad873d4 h1:DC7wcm+i+P1rN3Ff07vL+OndGg5OhNddHyTA+ocPqYE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231127180814-3a041ad873d4/go.mod h1:eJVxU6o+4G1PSczBr85xmyvSNYAKvAYgkub40YGomFM= +google.golang.org/genproto v0.0.0-20240116215550-a9fa1716bcac h1:ZL/Teoy/ZGnzyrqK/Optxxp2pmVh+fmJ97slxSRyzUg= +google.golang.org/genproto v0.0.0-20240116215550-a9fa1716bcac/go.mod h1:+Rvu7ElI+aLzyDQhpHMFMMltsD6m7nqpuWDd2CwJw3k= +google.golang.org/genproto/googleapis/api v0.0.0-20240122161410-6c6643bf1457 h1:KHBtwE+eQc3+NxpjmRFlQ3pJQ2FNnhhgB9xOV8kyBuU= +google.golang.org/genproto/googleapis/api v0.0.0-20240122161410-6c6643bf1457/go.mod h1:4jWUdICTdgc3Ibxmr8nAJiiLHwQBY0UI0XZcEMaFKaA= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240122161410-6c6643bf1457 h1:6Bi3wdn5Ed9baJn7P0gOhjwA98wOr6uSPjKagPHOVsE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240122161410-6c6643bf1457/go.mod h1:PAREbraiVEVGVdTZsVWjSbbTtSyGbAgIIvni8a8CD5s= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -744,8 +725,8 @@ google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8 google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.60.1 h1:26+wFr+cNqSGFcOXcabYC0lUVJVRa2Sb2ortSK7VrEU= -google.golang.org/grpc v1.60.1/go.mod h1:OlCHIeLYqSSsLi6i49B5QGdzaMZK9+M7LXN2FKz4eGM= +google.golang.org/grpc v1.61.0 h1:TOvOcuXn30kRao+gfcvsebNEa5iZIiLkisYEkf7R7o0= +google.golang.org/grpc v1.61.0/go.mod h1:VUbo7IFqmF1QtCAstipjG0GIoq49KvMe9+h1jFLBNJs= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -757,9 +738,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= +google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= diff --git a/sdks/go/pkg/beam/runners/prism/internal/execute.go b/sdks/go/pkg/beam/runners/prism/internal/execute.go index b8bc68dcd1b7..1aa95bc6ee18 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/execute.go +++ b/sdks/go/pkg/beam/runners/prism/internal/execute.go @@ -17,6 +17,7 @@ package internal import ( "context" + "errors" "fmt" "io" "sort" @@ -70,6 +71,13 @@ func RunPipeline(j *jobservices.Job) { j.Failed(err) return } + + if errors.Is(context.Cause(j.RootCtx), jobservices.ErrCancel) { + j.SendMsg("pipeline canceled " + j.String()) + j.Canceled() + return + } + j.SendMsg("pipeline completed " + j.String()) j.SendMsg("terminating " + j.String()) diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go index bb5eb88c9193..6cde48ded9ac 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go @@ -177,6 +177,16 @@ func (j *Job) Done() { j.sendState(jobpb.JobState_DONE) } +// Canceling indicates that the job is canceling. +func (j *Job) Canceling() { + j.sendState(jobpb.JobState_CANCELLING) +} + +// Canceled indicates that the job is canceled. +func (j *Job) Canceled() { + j.sendState(jobpb.JobState_CANCELLED) +} + // Failed indicates that the job completed unsuccessfully. func (j *Job) Failed(err error) { slog.Error("job failed", slog.Any("job", j), slog.Any("error", err)) diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go index 323d8c46efb1..0da37ef0bd73 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go @@ -17,6 +17,7 @@ package jobservices import ( "context" + "errors" "fmt" "sync" "sync/atomic" @@ -30,6 +31,10 @@ import ( "google.golang.org/protobuf/types/known/timestamppb" ) +var ( + ErrCancel = errors.New("pipeline canceled") +) + func (s *Server) nextId() string { v := atomic.AddUint32(&s.index, 1) return fmt.Sprintf("job-%03d", v) @@ -215,6 +220,31 @@ func (s *Server) Run(ctx context.Context, req *jobpb.RunJobRequest) (*jobpb.RunJ }, nil } +// Cancel a Job requested by the CancelJobRequest for jobs not in an already terminal state. +// Otherwise, returns nil if Job does not exist or the Job's existing state as part of the CancelJobResponse. +func (s *Server) Cancel(_ context.Context, req *jobpb.CancelJobRequest) (*jobpb.CancelJobResponse, error) { + s.mu.Lock() + job, ok := s.jobs[req.GetJobId()] + s.mu.Unlock() + if !ok { + return nil, nil + } + state := job.state.Load().(jobpb.JobState_Enum) + switch state { + case jobpb.JobState_CANCELLED, jobpb.JobState_DONE, jobpb.JobState_DRAINED, jobpb.JobState_UPDATED, jobpb.JobState_FAILED: + // Already at terminal state. + return &jobpb.CancelJobResponse{ + State: state, + }, nil + } + job.SendMsg("canceling " + job.String()) + job.Canceling() + job.CancelFn(ErrCancel) + return &jobpb.CancelJobResponse{ + State: jobpb.JobState_CANCELLING, + }, nil +} + // GetMessageStream subscribes to a stream of state changes and messages from the job. If throughput // is high, this may cause losses of messages. func (s *Server) GetMessageStream(req *jobpb.JobMessagesRequest, stream jobpb.JobService_GetMessageStreamServer) error { diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management_test.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management_test.go index 5813e6ef73e6..176abb8543a3 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management_test.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management_test.go @@ -169,6 +169,40 @@ func TestServer(t *testing.T) { } }, }, + { + name: "Canceling", + noJobsCheck: func(ctx context.Context, t *testing.T, undertest *Server) { + resp, err := undertest.Cancel(ctx, &jobpb.CancelJobRequest{JobId: "job-001"}) + if resp != nil { + t.Errorf("Canceling(\"job-001\") = %s, want nil", resp) + } + if err != nil { + t.Errorf("Canceling(\"job-001\") = %v, want nil", err) + } + }, + postPrepCheck: func(ctx context.Context, t *testing.T, undertest *Server) { + resp, err := undertest.Cancel(ctx, &jobpb.CancelJobRequest{JobId: "job-001"}) + if err != nil { + t.Errorf("Canceling(\"job-001\") = %v, want nil", err) + } + if diff := cmp.Diff(&jobpb.CancelJobResponse{ + State: jobpb.JobState_CANCELLING, + }, resp, cmpOpts...); diff != "" { + t.Errorf("Canceling(\"job-001\") (-want, +got):\n%v", diff) + } + }, + postRunCheck: func(ctx context.Context, t *testing.T, undertest *Server, jobID string) { + resp, err := undertest.Cancel(ctx, &jobpb.CancelJobRequest{JobId: jobID}) + if err != nil { + t.Errorf("Canceling(\"%s\") = %v, want nil", jobID, err) + } + if diff := cmp.Diff(&jobpb.CancelJobResponse{ + State: jobpb.JobState_DONE, + }, resp, cmpOpts...); diff != "" { + t.Errorf("Canceling(\"%s\") (-want, +got):\n%v", jobID, diff) + } + }, + }, } for _, test := range tests { var called sync.WaitGroup diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/server_test.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/server_test.go index 2223f030ce1d..473c84f958e3 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/server_test.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/server_test.go @@ -17,6 +17,7 @@ package jobservices import ( "context" + "errors" "sync" "testing" @@ -77,3 +78,63 @@ func TestServer_JobLifecycle(t *testing.T) { t.Log("success!") // Nothing to cleanup because we didn't start the server. } + +// Validates that invoking Cancel cancels a running job. +func TestServer_RunThenCancel(t *testing.T) { + var called sync.WaitGroup + called.Add(1) + undertest := NewServer(0, func(j *Job) { + if errors.Is(context.Cause(j.RootCtx), ErrCancel) { + j.state.Store(jobpb.JobState_CANCELLED) + called.Done() + } + }) + ctx := context.Background() + + wantPipeline := &pipepb.Pipeline{ + Requirements: []string{urns.RequirementSplittableDoFn}, + } + wantName := "testJob" + + resp, err := undertest.Prepare(ctx, &jobpb.PrepareJobRequest{ + Pipeline: wantPipeline, + JobName: wantName, + }) + if err != nil { + t.Fatalf("server.Prepare() = %v, want nil", err) + } + + if got := resp.GetPreparationId(); got == "" { + t.Fatalf("server.Prepare() = returned empty preparation ID, want non-empty: %v", prototext.Format(resp)) + } + + runResp, err := undertest.Run(ctx, &jobpb.RunJobRequest{ + PreparationId: resp.GetPreparationId(), + }) + if err != nil { + t.Fatalf("server.Run() = %v, want nil", err) + } + if got := runResp.GetJobId(); got == "" { + t.Fatalf("server.Run() = returned empty preparation ID, want non-empty") + } + + cancelResp, err := undertest.Cancel(ctx, &jobpb.CancelJobRequest{ + JobId: runResp.GetJobId(), + }) + if err != nil { + t.Fatalf("server.Canceling() = %v, want nil", err) + } + if cancelResp.State != jobpb.JobState_CANCELLING { + t.Fatalf("server.Canceling() = %v, want %v", cancelResp.State, jobpb.JobState_CANCELLING) + } + + called.Wait() + + stateResp, err := undertest.GetState(ctx, &jobpb.GetJobStateRequest{JobId: runResp.GetJobId()}) + if err != nil { + t.Fatalf("server.GetState() = %v, want nil", err) + } + if stateResp.State != jobpb.JobState_CANCELLED { + t.Fatalf("server.GetState() = %v, want %v", stateResp.State, jobpb.JobState_CANCELLED) + } +} diff --git a/sdks/java/core/build.gradle b/sdks/java/core/build.gradle index 64aae685c380..9de7e0da1eb3 100644 --- a/sdks/java/core/build.gradle +++ b/sdks/java/core/build.gradle @@ -75,6 +75,7 @@ dependencies { permitUnusedDeclared library.java.antlr // Required to load constants from the model, e.g. max timestamp for global window shadow project(path: ":model:pipeline", configuration: "shadow") + shadow project(path: ":model:fn-execution", configuration: "shadow") shadow project(path: ":model:job-management", configuration: "shadow") shadow library.java.vendored_grpc_1_60_1 shadow library.java.vendored_guava_32_1_2_jre @@ -119,6 +120,7 @@ dependencies { shadowTest library.java.log4j2_api shadowTest library.java.jamm testRuntimeOnly library.java.slf4j_jdk14 + testImplementation "com.github.stefanbirkner:system-rules:1.19.0" } project.tasks.compileTestJava { diff --git a/sdks/java/core/jmh/src/main/java/org/apache/beam/sdk/jmh/util/VarIntBenchmark.java b/sdks/java/core/jmh/src/main/java/org/apache/beam/sdk/jmh/util/VarIntBenchmark.java new file mode 100644 index 000000000000..964928d50558 --- /dev/null +++ b/sdks/java/core/jmh/src/main/java/org/apache/beam/sdk/jmh/util/VarIntBenchmark.java @@ -0,0 +1,303 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.jmh.util; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Random; +import org.apache.beam.sdk.util.ByteStringOutputStream; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.OperationsPerInvocation; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.infra.Blackhole; + +/** Benchmarks for {@link org.apache.beam.sdk.util.VarInt} and variants. */ +@OperationsPerInvocation(VarIntBenchmark.VALUES_PER_INVOCATION) +public class VarIntBenchmark { + static final int VALUES_PER_INVOCATION = 2048; + private static final Random RNG = new Random(314159); + + /** Output to {@link Blackhole}. Do nothing, assume nothing. */ + @State(Scope.Benchmark) + public static class BlackholeOutput { + OutputStream stream; + + @Setup + public void setup(Blackhole bh) { + stream = + new OutputStream() { + @Override + public void write(int b) { + bh.consume(b); + } + + @Override + public void write(byte[] b) throws IOException { + bh.consume(b); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + bh.consume(b); + } + }; + } + } + + /** Output to {@link ByteStringOutputStream}. */ + @State(Scope.Thread) + public static class ByteStringOutput { + final ByteStringOutputStream stream = new ByteStringOutputStream(); + + // Unfortunately, this needs to be cleaned up after use to avoid OOMs. + // It's not generally recommended to use Level.Invocation, but there's no way around it. + @TearDown(Level.Invocation) + public void tearDown(Blackhole bh) { + bh.consume(stream.toByteStringAndReset()); + } + } + + /** Input from randomly generated bytes. */ + @State(Scope.Benchmark) + public static class Bytes { + long[] values = new long[VALUES_PER_INVOCATION]; + + @Setup + public void setup() { + values = new long[VALUES_PER_INVOCATION]; + byte[] bytes = new byte[VALUES_PER_INVOCATION]; + RNG.nextBytes(bytes); + + for (int i = 0; i < VALUES_PER_INVOCATION; i++) { + values[i] = (long) (bytes[i] & 0x7F); + } + } + } + + /** Input from randomly generated longs. */ + @State(Scope.Benchmark) + public static class Longs { + long[] values = new long[VALUES_PER_INVOCATION]; + + @Setup + public void setup() { + values = new long[VALUES_PER_INVOCATION]; + + for (int i = 0; i < VALUES_PER_INVOCATION; i++) { + // This gaussian random is used to determine the encoded output size of the sample. + // The distribution value is tweaked to favor small integers, positive more so than + // negative. + double g = RNG.nextGaussian(); + double s = 3; + g = 10 * Math.min(Math.abs(g < 0 ? g + s : g / (s / 2)), s) / s; + + // Construct a bitmask to keep up to numBits of the input. + // Find the lowest bit to set in the 7 bit segment below numBits. + int numBits = 7 * (int) g; + long mask = ~(~0x7fL << numBits); + long low = 1L << numBits; + + values[i] = (RNG.nextLong() & mask) | low; + } + } + } + + // Used in Beam 2.52.0 + static void encodeDoLoop(long v, OutputStream stream) throws IOException { + do { + // Encode next 7 bits + terminator bit + long bits = v & 0x7F; + v >>>= 7; + byte b = (byte) (bits | ((v != 0) ? 0x80 : 0)); + stream.write(b); + } while (v != 0); + } + + // A tweak of the above, replacing a compare with a few bitwise operations. + static void encodeDoLoopTwiddle(long v, OutputStream stream) throws IOException { + do { + // Encode next 7 bits + terminator bit + long bits = v & 0x7F; + v >>>= 7; + long cont = (-v >> 63) & 0x80; + byte b = (byte) (bits | cont); + stream.write(b); + } while (v != 0); + } + + // Use a mask check to do less work for 1 byte output. + static void encodeLoop(long v, OutputStream stream) throws IOException { + while ((v & ~0x7FL) != 0) { + stream.write((byte) (v | 0x80)); + v >>>= 7; + } + stream.write((byte) v); + } + + // As above, but unrolled. + static void encodeUnrolled(long v, OutputStream stream) throws IOException { + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + stream.write((byte) (v)); + } + + @Benchmark + public void encodeDoLoopBlackhole(Longs input, BlackholeOutput output) throws IOException { + for (long l : input.values) { + encodeDoLoop(l, output.stream); + } + } + + @Benchmark + public void encodeDoLoopByteString(Longs input, ByteStringOutput output) throws IOException { + for (long l : input.values) { + encodeDoLoop(l, output.stream); + } + } + + @Benchmark + public void encodeDoLoopTwiddleBlackhole(Longs input, BlackholeOutput output) throws IOException { + for (long l : input.values) { + encodeDoLoopTwiddle(l, output.stream); + } + } + + @Benchmark + public void encodeDoLoopTwiddleByteString(Longs input, ByteStringOutput output) + throws IOException { + for (long l : input.values) { + encodeDoLoopTwiddle(l, output.stream); + } + } + + @Benchmark + public void encodeLoopBlackhole(Longs input, BlackholeOutput output) throws IOException { + for (long l : input.values) { + encodeLoop(l, output.stream); + } + } + + @Benchmark + public void encodeLoopByteString(Longs input, ByteStringOutput output) throws IOException { + for (long l : input.values) { + encodeLoop(l, output.stream); + } + } + + @Benchmark + public void encodeUnrolledBlackhole(Longs input, BlackholeOutput output) throws IOException { + for (long l : input.values) { + encodeUnrolled(l, output.stream); + } + } + + @Benchmark + public void encodeUnrolledByteString(Longs input, ByteStringOutput output) throws IOException { + for (long l : input.values) { + encodeUnrolled(l, output.stream); + } + } + + @Benchmark + public void singleByteEncodeDoLoopByteString(Bytes input, ByteStringOutput output) + throws IOException { + for (long l : input.values) { + encodeDoLoop(l, output.stream); + } + } + + @Benchmark + public void singleByteEncodeDoLoopTwiddleByteString(Bytes input, ByteStringOutput output) + throws IOException { + for (long l : input.values) { + encodeDoLoopTwiddle(l, output.stream); + } + } + + @Benchmark + public void singleByteEncodeLoopByteString(Bytes input, ByteStringOutput output) + throws IOException { + for (long l : input.values) { + encodeLoop(l, output.stream); + } + } + + @Benchmark + public void singleByteEncodeUnrolledByteString(Bytes input, ByteStringOutput output) + throws IOException { + for (long l : input.values) { + encodeUnrolled(l, output.stream); + } + } +} diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/CancellableQueue.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/CancellableQueue.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/CancellableQueue.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/CancellableQueue.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/IdGenerator.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/IdGenerator.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/IdGenerator.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/IdGenerator.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/IdGenerators.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/IdGenerators.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/IdGenerators.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/IdGenerators.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/JvmInitializers.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/JvmInitializers.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/JvmInitializers.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/JvmInitializers.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/channel/AddHarnessIdInterceptor.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/channel/AddHarnessIdInterceptor.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/channel/AddHarnessIdInterceptor.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/channel/AddHarnessIdInterceptor.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/channel/ManagedChannelFactory.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/channel/ManagedChannelFactory.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/channel/ManagedChannelFactory.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/channel/ManagedChannelFactory.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/channel/SocketAddressFactory.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/channel/SocketAddressFactory.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/channel/SocketAddressFactory.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/channel/SocketAddressFactory.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/channel/package-info.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/channel/package-info.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/channel/package-info.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/channel/package-info.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataGrpcMultiplexer.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataGrpcMultiplexer.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataGrpcMultiplexer.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataGrpcMultiplexer.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataInboundObserver.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataInboundObserver.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataInboundObserver.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataInboundObserver.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataOutboundAggregator.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataOutboundAggregator.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataOutboundAggregator.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/BeamFnDataOutboundAggregator.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/CloseableFnDataReceiver.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/CloseableFnDataReceiver.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/CloseableFnDataReceiver.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/CloseableFnDataReceiver.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/DataEndpoint.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/DataEndpoint.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/DataEndpoint.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/DataEndpoint.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/DecodingFnDataReceiver.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/DecodingFnDataReceiver.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/DecodingFnDataReceiver.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/DecodingFnDataReceiver.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/FnDataReceiver.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/FnDataReceiver.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/FnDataReceiver.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/FnDataReceiver.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/LogicalEndpoint.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/LogicalEndpoint.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/LogicalEndpoint.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/LogicalEndpoint.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortRead.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortRead.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortRead.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortRead.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortWrite.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortWrite.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortWrite.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortWrite.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/TimerEndpoint.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/TimerEndpoint.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/TimerEndpoint.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/TimerEndpoint.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/WeightedList.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/WeightedList.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/WeightedList.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/WeightedList.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/package-info.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/package-info.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/data/package-info.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/data/package-info.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/package-info.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/package-info.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/package-info.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/package-info.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/server/FnService.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/server/FnService.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/server/FnService.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/server/FnService.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/server/GrpcContextHeaderAccessorProvider.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/server/GrpcContextHeaderAccessorProvider.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/server/GrpcContextHeaderAccessorProvider.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/server/GrpcContextHeaderAccessorProvider.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/server/GrpcFnServer.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/server/GrpcFnServer.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/server/GrpcFnServer.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/server/GrpcFnServer.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/server/HeaderAccessor.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/server/HeaderAccessor.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/server/HeaderAccessor.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/server/HeaderAccessor.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/server/InProcessServerFactory.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/server/InProcessServerFactory.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/server/InProcessServerFactory.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/server/InProcessServerFactory.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/server/ServerFactory.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/server/ServerFactory.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/server/ServerFactory.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/server/ServerFactory.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/server/package-info.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/server/package-info.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/server/package-info.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/server/package-info.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/splittabledofn/RestrictionTrackers.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/splittabledofn/RestrictionTrackers.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/splittabledofn/RestrictionTrackers.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/splittabledofn/RestrictionTrackers.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/splittabledofn/WatermarkEstimators.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/splittabledofn/WatermarkEstimators.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/splittabledofn/WatermarkEstimators.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/splittabledofn/WatermarkEstimators.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/splittabledofn/package-info.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/splittabledofn/package-info.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/splittabledofn/package-info.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/splittabledofn/package-info.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/AdvancingPhaser.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/AdvancingPhaser.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/AdvancingPhaser.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/AdvancingPhaser.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/BufferingStreamObserver.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/BufferingStreamObserver.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/BufferingStreamObserver.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/BufferingStreamObserver.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/DataStreams.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/DataStreams.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/DataStreams.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/DataStreams.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/DirectStreamObserver.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/DirectStreamObserver.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/DirectStreamObserver.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/DirectStreamObserver.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/ForwardingClientResponseObserver.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/ForwardingClientResponseObserver.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/ForwardingClientResponseObserver.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/ForwardingClientResponseObserver.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/OutboundObserverFactory.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/OutboundObserverFactory.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/OutboundObserverFactory.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/OutboundObserverFactory.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterable.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterable.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterable.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterable.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterables.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterables.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterables.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterables.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterator.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterator.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterator.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterator.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterators.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterators.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterators.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/PrefetchableIterators.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/SynchronizedStreamObserver.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/SynchronizedStreamObserver.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/SynchronizedStreamObserver.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/SynchronizedStreamObserver.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/package-info.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/package-info.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/stream/package-info.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/stream/package-info.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/test/TestExecutors.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/test/TestExecutors.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/test/TestExecutors.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/test/TestExecutors.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/test/TestStreams.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/test/TestStreams.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/test/TestStreams.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/test/TestStreams.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/test/package-info.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/test/package-info.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/test/package-info.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/test/package-info.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/windowing/EncodedBoundedWindow.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/windowing/EncodedBoundedWindow.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/windowing/EncodedBoundedWindow.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/windowing/EncodedBoundedWindow.java diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/windowing/package-info.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/fn/windowing/package-info.java similarity index 100% rename from sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/windowing/package-info.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/fn/windowing/package-info.java diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java index c22b726c99a2..78ec1a8c840d 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java @@ -395,11 +395,14 @@ public interface OutputReceiver { void outputWithTimestamp(T output, Instant timestamp); - void outputWindowedValue( + default void outputWindowedValue( T output, Instant timestamp, Collection windows, - PaneInfo paneInfo); + PaneInfo paneInfo) { + throw new UnsupportedOperationException( + String.format("Not implemented: %s.outputWindowedValue", this.getClass().getName())); + } } /** Receives tagged output for a multi-output function. */ diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/join/CoGbkResult.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/join/CoGbkResult.java index ba5bce20d090..ba0430e39daf 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/join/CoGbkResult.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/join/CoGbkResult.java @@ -95,8 +95,9 @@ public CoGbkResult( // according to their tag. final Iterator taggedIter = taggedValues.iterator(); int elementCount = 0; + boolean isReiterator = taggedIter instanceof Reiterator; while (taggedIter.hasNext()) { - if (elementCount++ >= inMemoryElementCount && taggedIter instanceof Reiterator) { + if (isReiterator && elementCount++ >= inMemoryElementCount) { // Let the tails be lazy. break; } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/VarInt.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/VarInt.java index 239cbafc1743..5432383f5ad4 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/VarInt.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/VarInt.java @@ -42,13 +42,61 @@ public static void encode(int v, OutputStream stream) throws IOException { /** Encodes the given value onto the stream. */ public static void encode(long v, OutputStream stream) throws IOException { - do { - // Encode next 7 bits + terminator bit - long bits = v & 0x7F; - v >>>= 7; - byte b = (byte) (bits | ((v != 0) ? 0x80 : 0)); - stream.write(b); - } while (v != 0); + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + if ((v & ~0x7F) == 0) { + stream.write((byte) v); + return; + } + stream.write((byte) (v | 0x80)); + v >>>= 7; + stream.write((byte) (v)); } /** Decodes an integer value from the given stream. */ diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/CancellableQueueTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/CancellableQueueTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/CancellableQueueTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/CancellableQueueTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/IdGeneratorsTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/IdGeneratorsTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/IdGeneratorsTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/IdGeneratorsTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/JvmInitializersTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/JvmInitializersTest.java similarity index 95% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/JvmInitializersTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/JvmInitializersTest.java index e1da00ae94dd..7820467987ef 100644 --- a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/JvmInitializersTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/JvmInitializersTest.java @@ -17,7 +17,6 @@ */ package org.apache.beam.sdk.fn; -import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.core.StringContains.containsString; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -27,6 +26,7 @@ import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.testing.ExpectedLogs; import org.apache.beam.sdk.testing.TestPipeline; +import org.hamcrest.MatcherAssert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -72,7 +72,8 @@ public void runOnStartup_runsInitializers() { JvmInitializers.runOnStartup(); assertTrue(onStartupRan); - assertThat(systemOutRule.getLog(), containsString("Running JvmInitializer#onStartup")); + MatcherAssert.assertThat( + systemOutRule.getLog(), containsString("Running JvmInitializer#onStartup")); } @Test diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/channel/ManagedChannelFactoryTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/channel/ManagedChannelFactoryTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/channel/ManagedChannelFactoryTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/channel/ManagedChannelFactoryTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/channel/SocketAddressFactoryTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/channel/SocketAddressFactoryTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/channel/SocketAddressFactoryTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/channel/SocketAddressFactoryTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataGrpcMultiplexerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataGrpcMultiplexerTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataGrpcMultiplexerTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataGrpcMultiplexerTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataInboundObserverTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataInboundObserverTest.java similarity index 95% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataInboundObserverTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataInboundObserverTest.java index 137c2f890fb5..bd279cee208a 100644 --- a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataInboundObserverTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataInboundObserverTest.java @@ -17,7 +17,6 @@ */ package org.apache.beam.sdk.fn.data; -import static org.apache.beam.sdk.util.WindowedValue.valueInGlobalWindow; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.contains; import static org.junit.Assert.assertSame; @@ -94,8 +93,13 @@ public void testConsumptionOfValuesHappensOnAwaitCompletionCallersThread() throw assertThat( values, contains( - valueInGlobalWindow("ABC"), valueInGlobalWindow("DEF"), valueInGlobalWindow("GHI"))); - assertThat(timers, contains(valueInGlobalWindow("UVW"), valueInGlobalWindow("XYZ"))); + WindowedValue.valueInGlobalWindow("ABC"), + WindowedValue.valueInGlobalWindow("DEF"), + WindowedValue.valueInGlobalWindow("GHI"))); + assertThat( + timers, + contains( + WindowedValue.valueInGlobalWindow("UVW"), WindowedValue.valueInGlobalWindow("XYZ"))); future.get(); } @@ -205,7 +209,7 @@ public void testBadProducerDataFailureVisibleToAwaitCompletionCallerAndProducer( private BeamFnApi.Elements dataWith(String... values) throws Exception { ByteStringOutputStream output = new ByteStringOutputStream(); for (String value : values) { - CODER.encode(valueInGlobalWindow(value), output); + CODER.encode(WindowedValue.valueInGlobalWindow(value), output); } return BeamFnApi.Elements.newBuilder() .addData( @@ -224,7 +228,7 @@ private BeamFnApi.Elements lastData() throws Exception { private BeamFnApi.Elements timerWith(String... values) throws Exception { ByteStringOutputStream output = new ByteStringOutputStream(); for (String value : values) { - CODER.encode(valueInGlobalWindow(value), output); + CODER.encode(WindowedValue.valueInGlobalWindow(value), output); } return BeamFnApi.Elements.newBuilder() .addTimers( diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataOutboundAggregatorTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataOutboundAggregatorTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataOutboundAggregatorTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/data/BeamFnDataOutboundAggregatorTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortReadTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortReadTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortReadTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortReadTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortWriteTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortWriteTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortWriteTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/data/RemoteGrpcPortWriteTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/splittabledofn/RestrictionTrackersTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/splittabledofn/RestrictionTrackersTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/splittabledofn/RestrictionTrackersTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/splittabledofn/RestrictionTrackersTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/splittabledofn/WatermarkEstimatorsTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/splittabledofn/WatermarkEstimatorsTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/splittabledofn/WatermarkEstimatorsTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/splittabledofn/WatermarkEstimatorsTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/AdvancingPhaserTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/AdvancingPhaserTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/AdvancingPhaserTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/AdvancingPhaserTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/BufferingStreamObserverTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/BufferingStreamObserverTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/BufferingStreamObserverTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/BufferingStreamObserverTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/DataStreamsTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/DataStreamsTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/DataStreamsTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/DataStreamsTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/DirectStreamObserverTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/DirectStreamObserverTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/DirectStreamObserverTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/DirectStreamObserverTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/ForwardingClientResponseObserverTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/ForwardingClientResponseObserverTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/ForwardingClientResponseObserverTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/ForwardingClientResponseObserverTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/OutboundObserverFactoryTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/OutboundObserverFactoryTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/OutboundObserverFactoryTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/OutboundObserverFactoryTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/PrefetchableIterablesTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/PrefetchableIterablesTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/PrefetchableIterablesTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/PrefetchableIterablesTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/PrefetchableIteratorsTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/PrefetchableIteratorsTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/stream/PrefetchableIteratorsTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/stream/PrefetchableIteratorsTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/test/TestExecutorsTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/test/TestExecutorsTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/test/TestExecutorsTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/test/TestExecutorsTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/test/TestStreamsTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/test/TestStreamsTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/test/TestStreamsTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/test/TestStreamsTest.java diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/windowing/EncodedBoundedWindowTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/fn/windowing/EncodedBoundedWindowTest.java similarity index 100% rename from sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/windowing/EncodedBoundedWindowTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/fn/windowing/EncodedBoundedWindowTest.java diff --git a/sdks/java/expansion-service/build.gradle b/sdks/java/expansion-service/build.gradle index 18df128b9386..6947e53354db 100644 --- a/sdks/java/expansion-service/build.gradle +++ b/sdks/java/expansion-service/build.gradle @@ -41,10 +41,8 @@ dependencies { implementation project(path: ":sdks:java:core", configuration: "shadow") implementation project(path: ":runners:core-construction-java") implementation project(path: ":runners:java-fn-execution") - implementation project(path: ":sdks:java:fn-execution") implementation project(path: ":sdks:java:harness") permitUnusedDeclared project(path: ":model:fn-execution") - permitUnusedDeclared project(path: ":sdks:java:fn-execution") implementation library.java.jackson_annotations implementation library.java.jackson_databind implementation library.java.jackson_dataformat_yaml diff --git a/sdks/java/extensions/protobuf/build.gradle b/sdks/java/extensions/protobuf/build.gradle index 1582492c293e..088f5aca63ca 100644 --- a/sdks/java/extensions/protobuf/build.gradle +++ b/sdks/java/extensions/protobuf/build.gradle @@ -38,6 +38,7 @@ dependencies { implementation library.java.commons_compress implementation library.java.slf4j_api implementation project(path: ":sdks:java:core", configuration: "shadow") + implementation("com.google.cloud:google-cloud-storage:2.32.1") implementation library.java.protobuf_java implementation("com.squareup.wire:wire-schema-jvm:4.9.3") implementation("io.apicurio:apicurio-registry-protobuf-schema-utilities:3.0.0.M2") diff --git a/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoByteUtils.java b/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoByteUtils.java index 02419ec0f619..dd73739246d0 100644 --- a/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoByteUtils.java +++ b/sdks/java/extensions/protobuf/src/main/java/org/apache/beam/sdk/extensions/protobuf/ProtoByteUtils.java @@ -20,6 +20,9 @@ import static java.util.stream.Collectors.toList; import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageOptions; import com.google.protobuf.DescriptorProtos; import com.google.protobuf.Descriptors; import com.google.protobuf.DynamicMessage; @@ -41,15 +44,15 @@ import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.sdk.transforms.SimpleFunction; import org.apache.beam.sdk.values.Row; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Splitter; import org.apache.commons.compress.utils.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * Utility class for working with Protocol Buffer (Proto) data in the context of Apache Beam. This - * class provides methods to retrieve Beam Schemas from Proto messages, convert Proto bytes to Beam - * Rows, and vice versa. It also includes utilities for handling Protocol Buffer schemas and related - * file operations. + * Utility class for working with Protocol Buffer (Proto) data. This class provides methods to + * retrieve Beam Schemas from Proto messages, convert Proto bytes to Beam Rows, and vice versa. It + * also includes utilities for handling Protocol Buffer schemas and related file operations. * *

Users can utilize the methods in this class to facilitate the integration of Proto data * processing within Apache Beam pipelines, allowing for the seamless transformation of Proto @@ -105,7 +108,11 @@ private static Descriptors.Descriptor getDescriptorFromProtoSchema( try { Descriptors.FileDescriptor fileDescriptor = FileDescriptorUtils.protoFileToFileDescriptor(result); - return fileDescriptor.findMessageTypeByName(messageName); + + List messageElements = Splitter.on('.').splitToList(messageName); + String messageTypeByName = messageElements.get(messageElements.size() - 1); + + return fileDescriptor.findMessageTypeByName(messageTypeByName); } catch (Descriptors.DescriptorValidationException e) { throw new RuntimeException(e); } @@ -148,10 +155,12 @@ public static SerializableFunction getProtoBytesToRowFunction( @Override public Row apply(byte[] input) { try { + List messageElements = Splitter.on('.').splitToList(messageName); + String messageTypeByName = messageElements.get(messageElements.size() - 1); final Descriptors.Descriptor descriptor = protoDomain .getFileDescriptor(dynamicProtoDomain.getFileName()) - .findMessageTypeByName(messageName); + .findMessageTypeByName(messageTypeByName); DynamicMessage dynamicMessage = DynamicMessage.parseFrom(descriptor, input); SerializableFunction res = protoDynamicMessageSchema.getToRowFunction(); @@ -243,6 +252,41 @@ private static byte[] getFileAsBytes(String fileDescriptorPath) { * @throws RuntimeException if an error occurs while finding or opening the file. */ private static ReadableByteChannel getFileByteChannel(String filePath) { + if (isGcsPath(filePath)) { + return openGcsFile(filePath); + } else { + return openLocalFile(filePath); + } + } + + private static boolean isGcsPath(String filePath) { + return filePath.startsWith("gs://"); + } + + /** + * Opens a ReadableByteChannel for reading from a Google Cloud Storage (GCS) file. + * + * @param filePath The GCS file path (e.g., "gs://your-bucket-name/your-object-name"). + * @return A ReadableByteChannel for reading from the specified GCS file. + */ + private static ReadableByteChannel openGcsFile(String filePath) { + Storage storage = StorageOptions.getDefaultInstance().getService(); + String bucketName = getBucketName(filePath); + String objectName = getObjectName(filePath); + Blob blob = storage.get(bucketName, objectName); + return blob.reader(); + } + + /** + * Opens a ReadableByteChannel for reading from a local file using the Apache Beam FileSystems + * API. + * + * @param filePath The local file path. + * @return A ReadableByteChannel for reading from the specified local file. + * @throws IllegalArgumentException If no files match the specified pattern or if more than one + * file matches. + */ + private static ReadableByteChannel openLocalFile(String filePath) { try { MatchResult result = FileSystems.match(filePath); checkArgument( @@ -259,6 +303,29 @@ private static ReadableByteChannel getFileByteChannel(String filePath) { } } + /** + * Extracts the bucket name from a Google Cloud Storage (GCS) file path. + * + * @param gcsPath The GCS file path (e.g., "gs://your-bucket-name/your-object-name"). + * @return The bucket name extracted from the GCS path. + */ + private static String getBucketName(String gcsPath) { + int startIndex = "gs://".length(); + int endIndex = gcsPath.indexOf('/', startIndex); + return gcsPath.substring(startIndex, endIndex); + } + + /** + * Extracts the object name from a Google Cloud Storage (GCS) file path. + * + * @param gcsPath The GCS file path (e.g., "gs://your-bucket-name/your-object-name"). + * @return The object name extracted from the GCS path. + */ + private static String getObjectName(String gcsPath) { + int startIndex = gcsPath.indexOf('/', "gs://".length()) + 1; + return gcsPath.substring(startIndex); + } + /** * Represents metadata associated with a Protocol Buffer schema, including the File Name and * ProtoDomain. diff --git a/sdks/java/extensions/protobuf/src/test/java/org/apache/beam/sdk/extensions/protobuf/ProtoByteUtilsTest.java b/sdks/java/extensions/protobuf/src/test/java/org/apache/beam/sdk/extensions/protobuf/ProtoByteUtilsTest.java index 04bcde6a0fe0..6105208d8366 100644 --- a/sdks/java/extensions/protobuf/src/test/java/org/apache/beam/sdk/extensions/protobuf/ProtoByteUtilsTest.java +++ b/sdks/java/extensions/protobuf/src/test/java/org/apache/beam/sdk/extensions/protobuf/ProtoByteUtilsTest.java @@ -48,6 +48,26 @@ public class ProtoByteUtilsTest { + " Address address = 4;\n" + "}"; + private static final String PROTO_STRING_PACKAGE_SCHEMA = + "syntax = \"proto3\";\n" + + "package com.test.proto;" + + "\n" + + "message MyMessage {\n" + + " int32 id = 1;\n" + + " string name = 2;\n" + + " bool active = 3;\n" + + "\n" + + " // Nested field\n" + + " message Address {\n" + + " string street = 1;\n" + + " string city = 2;\n" + + " string state = 3;\n" + + " string zip_code = 4;\n" + + " }\n" + + "\n" + + " Address address = 4;\n" + + "}"; + private static final String DESCRIPTOR_PATH = Objects.requireNonNull( ProtoByteUtilsTest.class.getResource( @@ -84,6 +104,14 @@ public void testProtoSchemaStringToBeamSchema() { Assert.assertEquals(schema.getFieldNames(), SCHEMA.getFieldNames()); } + @Test + public void testProtoSchemaWitPackageStringToBeamSchema() { + Schema schema = + ProtoByteUtils.getBeamSchemaFromProtoSchema( + PROTO_STRING_PACKAGE_SCHEMA, "com.test.proto.MyMessage"); + Assert.assertEquals(schema.getFieldNames(), SCHEMA.getFieldNames()); + } + @Test public void testProtoBytesToRowFunctionGenerateSerializableFunction() { SerializableFunction protoBytesToRowFunction = @@ -111,6 +139,22 @@ public void testProtoBytesToRowFunctionReturnsRowFailure() { protoBytesToRowFunction.apply(inputBytes); } + @Test + public void testProtoBytesToRowFunctionReturnsRowSuccess() { + // Create a proto bytes to row function + SerializableFunction protoBytesToRowFunction = + ProtoByteUtils.getProtoBytesToRowFunction(DESCRIPTOR_PATH, MESSAGE_NAME); + + byte[] byteArray = { + 8, -46, 9, 18, 3, 68, 111, 101, 34, 35, 10, 7, 115, 101, 97, 116, 116, 108, 101, 18, 11, 102, + 97, 107, 101, 32, 115, 116, 114, 101, 101, 116, 26, 2, 119, 97, 34, 7, 84, 79, 45, 49, 50, 51, + 52 + }; + + Row row = protoBytesToRowFunction.apply(byteArray); + Assert.assertEquals("Doe", row.getValue("name")); + } + @Test public void testRowToProtoFunction() { Row row = @@ -144,4 +188,32 @@ public void testRowToProtoSchemaFunction() { Assert.assertNotNull( ProtoByteUtils.getRowToProtoBytesFromSchema(PROTO_STRING_SCHEMA, "MyMessage").apply(row)); } + + @Test + public void testRowToProtoSchemaWithPackageFunction() { + Row row = + Row.withSchema(SCHEMA) + .withFieldValue("id", 1234) + .withFieldValue("name", "Doe") + .withFieldValue("active", false) + .withFieldValue("address.city", "seattle") + .withFieldValue("address.street", "fake street") + .withFieldValue("address.zip_code", "TO-1234") + .withFieldValue("address.state", "wa") + .build(); + + byte[] byteArray = { + 8, -46, 9, 18, 3, 68, 111, 101, 34, 35, 10, 7, 115, 101, 97, 116, 116, 108, 101, 18, 11, 102, + 97, 107, 101, 32, 115, 116, 114, 101, 101, 116, 26, 2, 119, 97, 34, 7, 84, 79, 45, 49, 50, 51, + 52 + }; + + byte[] resultBytes = + ProtoByteUtils.getRowToProtoBytesFromSchema( + PROTO_STRING_PACKAGE_SCHEMA, "com.test.proto.MyMessage") + .apply(row); + + Assert.assertNotNull(resultBytes); + Assert.assertArrayEquals(byteArray, resultBytes); + } } diff --git a/sdks/java/extensions/python/build.gradle b/sdks/java/extensions/python/build.gradle index 40729d5f9cab..a83afac68815 100644 --- a/sdks/java/extensions/python/build.gradle +++ b/sdks/java/extensions/python/build.gradle @@ -30,7 +30,7 @@ dependencies { implementation project(path: ":model:pipeline", configuration: "shadow") implementation project(path: ":runners:core-construction-java") implementation project(path: ":sdks:java:core", configuration: "shadow") - implementation project(path: ":sdks:java:transform-service:launcher", configuration: "shadow") + implementation project(path: ":sdks:java:transform-service:launcher") testImplementation library.java.junit testImplementation library.java.hamcrest testImplementation project(":runners:core-construction-java").sourceSets.test.output diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRel.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRel.java index ad856c1c3a7d..d60ebe46b370 100644 --- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRel.java +++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRel.java @@ -26,7 +26,6 @@ import com.google.zetasql.Value; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.util.ArrayDeque; -import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -49,7 +48,6 @@ import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PCollectionList; import org.apache.beam.sdk.values.PCollectionTuple; @@ -363,16 +361,6 @@ public void output(Row output) { public void outputWithTimestamp(Row output, Instant timestamp) { c.output(tag, output, timestamp, w); } - - @Override - public void outputWindowedValue( - Row output, - Instant timestamp, - Collection windows, - PaneInfo paneInfo) { - throw new UnsupportedOperationException( - "outputWindowedValue not supported in finish bundle here"); - } } private static RuntimeException extractException(Throwable e) { diff --git a/sdks/java/fn-execution/build.gradle b/sdks/java/fn-execution/build.gradle deleted file mode 100644 index d1b5c713d092..000000000000 --- a/sdks/java/fn-execution/build.gradle +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -plugins { id 'org.apache.beam.module' } -applyJavaNature( - automaticModuleName: 'org.apache.beam.sdk.fn', -) - -description = "Apache Beam :: SDKs :: Java :: Fn Execution" -ext.summary = """Contains code shared across the Beam Java SDK Harness and Java Runners to execute using -the Beam Portability Framework.""" - -dependencies { - implementation project(path: ":model:pipeline", configuration: "shadow") - implementation project(path: ":model:fn-execution", configuration: "shadow") - implementation project(path: ":sdks:java:core", configuration: "shadow") - implementation library.java.vendored_grpc_1_60_1 - implementation library.java.vendored_guava_32_1_2_jre - implementation library.java.slf4j_api - implementation library.java.joda_time - provided library.java.junit - testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") - testImplementation library.java.junit - testImplementation library.java.mockito_core - testImplementation library.java.commons_lang3 - testImplementation "com.github.stefanbirkner:system-rules:1.19.0" - testRuntimeOnly library.java.slf4j_jdk14 -} diff --git a/sdks/java/harness/build.gradle b/sdks/java/harness/build.gradle index 505cbb38ab13..3c50f3c8edf2 100644 --- a/sdks/java/harness/build.gradle +++ b/sdks/java/harness/build.gradle @@ -29,7 +29,7 @@ dependencies { // :sdks:java:core and transitive dependencies provided project(path: ":model:pipeline", configuration: "shadow") provided project(path: ":sdks:java:core", configuration: "shadow") - provided project(path: ":sdks:java:transform-service:launcher", configuration: "shadow") + provided project(path: ":sdks:java:transform-service:launcher") provided library.java.avro provided library.java.joda_time provided library.java.slf4j_api @@ -87,12 +87,10 @@ dependencies { implementation project(path: ":model:fn-execution", configuration: "shadow") implementation project(":runners:core-construction-java") implementation project(":runners:core-java") - implementation project(":sdks:java:fn-execution") permitUnusedDeclared project(path: ":sdks:java:transform-service:launcher") testImplementation library.java.junit testImplementation library.java.mockito_core - shadowTestRuntimeClasspath project(path: ":sdks:java:core", configuration: "shadowTest") - testImplementation project(path: ":sdks:java:fn-execution", configuration: "testRuntimeMigration") + shadowTest project(path: ":sdks:java:core", configuration: "shadowTest") shadowTestRuntimeClasspath library.java.slf4j_jdk14 permitUnusedDeclared library.java.avro } diff --git a/sdks/java/harness/jmh/build.gradle b/sdks/java/harness/jmh/build.gradle index 90e752947172..d3b33929d58c 100644 --- a/sdks/java/harness/jmh/build.gradle +++ b/sdks/java/harness/jmh/build.gradle @@ -35,12 +35,15 @@ dependencies { implementation project(path: ":sdks:java:harness", configuration: "shadow") implementation project(path: ":runners:java-fn-execution") implementation project(path: ":model:pipeline", configuration: "shadow") + implementation project(path: ":model:fn-execution", configuration: "shadow") implementation library.java.vendored_grpc_1_60_1 implementation library.java.vendored_guava_32_1_2_jre implementation library.java.slf4j_api implementation library.java.joda_time + implementation library.java.junit runtimeOnly library.java.slf4j_jdk14 jammAgent library.java.jamm + testImplementation library.java.junit } jmh { diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BeamFnDataWriteRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BeamFnDataWriteRunner.java index c47d86cd645e..310e9ef50834 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BeamFnDataWriteRunner.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BeamFnDataWriteRunner.java @@ -23,7 +23,6 @@ import java.io.IOException; import java.util.Map; import java.util.function.Supplier; -import org.apache.beam.fn.harness.PTransformRunnerFactory.Context; import org.apache.beam.fn.harness.state.BeamFnStateClient; import org.apache.beam.fn.harness.state.StateBackedIterable.StateBackedIterableTranslationContext; import org.apache.beam.model.fnexecution.v1.BeamFnApi; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java index 3d38ef0e83ff..cd62c5810d81 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java @@ -96,7 +96,7 @@ import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.StorageClient; import org.apache.beam.sdk.io.gcp.bigquery.BigQuerySourceBase.ExtractResult; import org.apache.beam.sdk.io.gcp.bigquery.DynamicDestinationsHelpers.ConstantSchemaDestinations; -import org.apache.beam.sdk.io.gcp.bigquery.DynamicDestinationsHelpers.ConstantTimePartitioningDestinations; +import org.apache.beam.sdk.io.gcp.bigquery.DynamicDestinationsHelpers.ConstantTimePartitioningClusteringDestinations; import org.apache.beam.sdk.io.gcp.bigquery.DynamicDestinationsHelpers.SchemaFromViewDestinations; import org.apache.beam.sdk.io.gcp.bigquery.DynamicDestinationsHelpers.TableFunctionDestinations; import org.apache.beam.sdk.io.gcp.bigquery.PassThroughThenCleanup.CleanupOperation; @@ -2744,8 +2744,7 @@ public Write withJsonTimePartitioning(ValueProvider partitioning) { } /** - * Specifies the clustering fields to use when writing to a single output table. Can only be - * used when {@link#withTimePartitioning(TimePartitioning)} is set. If {@link + * Specifies the clustering fields to use when writing to a single output table. If {@link * #to(SerializableFunction)} or {@link #to(DynamicDestinations)} is used to write to dynamic * tables, the fields here will be ignored; call {@link #withClustering()} instead. */ @@ -3357,9 +3356,10 @@ && getStorageApiTriggeringFrequency(bqOptions) != null) { } // Wrap with a DynamicDestinations class that will provide the proper TimePartitioning. - if (getJsonTimePartitioning() != null) { + if (getJsonTimePartitioning() != null + || Optional.ofNullable(getClustering()).map(Clustering::getFields).isPresent()) { dynamicDestinations = - new ConstantTimePartitioningDestinations<>( + new ConstantTimePartitioningClusteringDestinations<>( (DynamicDestinations) dynamicDestinations, getJsonTimePartitioning(), StaticValueProvider.of(BigQueryHelpers.toJsonString(getClustering()))); diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySinkMetrics.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySinkMetrics.java index c0f470b39214..34e3b704b4f3 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySinkMetrics.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySinkMetrics.java @@ -22,6 +22,7 @@ import java.time.Instant; import java.util.List; import java.util.NavigableMap; +import java.util.Optional; import java.util.TreeMap; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -40,10 +41,10 @@ * *

In general metrics be in the namespace 'BigQuerySink' and have their name formatted as: * - *

'{baseName}-{metricLabelKey1}:{metricLabelVal1};...{metricLabelKeyN}:{metricLabelValN};' + *

'{baseName}*{metricLabelKey1}:{metricLabelVal1};...{metricLabelKeyN}:{metricLabelValN};' */ public class BigQuerySinkMetrics { - private static Boolean supportMetricsDeletion = false; + private static boolean supportMetricsDeletion = false; public static final String METRICS_NAMESPACE = "BigQuerySink"; @@ -78,10 +79,11 @@ enum RowStatus { private static final String RPC_METHOD = "rpc_method"; private static final String ROW_STATUS = "row_status"; - // Delimiters + // Delimiters. Avoid using dilimiters that can also be used in a BigQuery table name. + // ref: https://cloud.google.com/bigquery/docs/tables#table_naming private static final char LABEL_DELIMITER = ';'; private static final char METRIC_KV_DELIMITER = ':'; - private static final char METRIC_NAME_DELIMITER = '-'; + private static final char METRIC_NAME_DELIMITER = '*'; @AutoValue public abstract static class ParsedMetricName { @@ -119,38 +121,41 @@ private static String createLabeledMetricName( * Parse a 'metric name' String that was created with 'createLabeledMetricName'. The input string * should be formatted as. * - *

'{baseName}-{metricLabelKey1}:{metricLabelVal1};...{metricLabelKeyN}:{metricLabelValN};' + *

'{baseName}*{metricLabelKey1}:{metricLabelVal1};...{metricLabelKeyN}:{metricLabelValN};' * * @param metricName * @return Returns a ParsedMetricName object if the input string is properly formatted. If the - * input string is empty or malformed, returns null. + * input string is empty or malformed, returns an empty value. */ - public static @Nullable ParsedMetricName parseMetricName(String metricName) { + public static Optional parseMetricName(String metricName) { if (metricName.isEmpty()) { - return null; + return Optional.empty(); } - List metricNameSplit = Splitter.on(METRIC_NAME_DELIMITER).splitToList(metricName); + List metricNameSplit = + Splitter.on(METRIC_NAME_DELIMITER).limit(2).splitToList(metricName); ImmutableMap.Builder metricLabelsBuilder = ImmutableMap.builder(); - if (metricNameSplit.size() == 1) { - return ParsedMetricName.create(metricNameSplit.get(0)); + if (metricNameSplit.size() == 0) { + return Optional.empty(); } - if (metricNameSplit.size() != 2) { - return null; + if (metricNameSplit.size() == 1) { + return Optional.of(ParsedMetricName.create(metricNameSplit.get(0))); } + // metrcNameSplit is assumed to be size two. List labels = Splitter.on(LABEL_DELIMITER).splitToList(metricNameSplit.get(1)); for (String label : labels) { - List kv = Splitter.on(METRIC_KV_DELIMITER).splitToList(label); + List kv = Splitter.on(METRIC_KV_DELIMITER).limit(2).splitToList(label); if (kv.size() != 2) { continue; } metricLabelsBuilder.put(kv.get(0), kv.get(1)); } - return ParsedMetricName.create(metricNameSplit.get(0), metricLabelsBuilder.build()); + return Optional.of( + ParsedMetricName.create(metricNameSplit.get(0), metricLabelsBuilder.build())); } /** @@ -321,7 +326,7 @@ public static void reportFailedRPCMetrics( updateRpcLatencyMetric(c, method); } - public static void setSupportMetricsDeletion(Boolean supportMetricsDeletion) { + public static void setSupportMetricsDeletion(boolean supportMetricsDeletion) { BigQuerySinkMetrics.supportMetricsDeletion = supportMetricsDeletion; } } diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/CreateTableHelpers.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/CreateTableHelpers.java index 6edd3f71cc71..7a94657107ec 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/CreateTableHelpers.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/CreateTableHelpers.java @@ -179,11 +179,13 @@ private static void tryCreateTable( TimePartitioning timePartitioning = tableDestination.getTimePartitioning(); if (timePartitioning != null) { table.setTimePartitioning(timePartitioning); - Clustering clustering = tableDestination.getClustering(); - if (clustering != null) { - table.setClustering(clustering); - } } + + Clustering clustering = tableDestination.getClustering(); + if (clustering != null) { + table.setClustering(clustering); + } + if (kmsKey != null) { table.setEncryptionConfiguration(new EncryptionConfiguration().setKmsKeyName(kmsKey)); } diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/DynamicDestinationsHelpers.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/DynamicDestinationsHelpers.java index 62355fd9417d..1f042a81eb9d 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/DynamicDestinationsHelpers.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/DynamicDestinationsHelpers.java @@ -272,23 +272,28 @@ public String toString() { } } - static class ConstantTimePartitioningDestinations + static class ConstantTimePartitioningClusteringDestinations extends DelegatingDynamicDestinations { - private final ValueProvider jsonTimePartitioning; + private final @Nullable ValueProvider jsonTimePartitioning; private final @Nullable ValueProvider jsonClustering; - ConstantTimePartitioningDestinations( + ConstantTimePartitioningClusteringDestinations( DynamicDestinations inner, ValueProvider jsonTimePartitioning, ValueProvider jsonClustering) { super(inner); - Preconditions.checkArgumentNotNull( - jsonTimePartitioning, "jsonTimePartitioning provider can not be null"); - if (jsonTimePartitioning.isAccessible()) { - Preconditions.checkArgumentNotNull( - jsonTimePartitioning.get(), "jsonTimePartitioning can not be null"); - } + + checkArgument( + (jsonTimePartitioning != null + && jsonTimePartitioning.isAccessible() + && jsonTimePartitioning.get() != null) + || (jsonClustering != null + && jsonClustering.isAccessible() + && jsonClustering.get() != null), + "at least one of jsonTimePartitioning or jsonClustering must be non-null, accessible " + + "and present"); + this.jsonTimePartitioning = jsonTimePartitioning; this.jsonClustering = jsonClustering; } @@ -296,13 +301,12 @@ static class ConstantTimePartitioningDestinations @Override public TableDestination getDestination(@Nullable ValueInSingleWindow element) { TableDestination destination = super.getDestination(element); - String partitioning = this.jsonTimePartitioning.get(); - checkArgument(partitioning != null, "jsonTimePartitioning can not be null"); + String partitioning = + Optional.ofNullable(jsonTimePartitioning).map(ValueProvider::get).orElse(null); + String clustering = Optional.ofNullable(jsonClustering).map(ValueProvider::get).orElse(null); + return new TableDestination( - destination.getTableSpec(), - destination.getTableDescription(), - partitioning, - Optional.ofNullable(jsonClustering).map(ValueProvider::get).orElse(null)); + destination.getTableSpec(), destination.getTableDescription(), partitioning, clustering); } @Override @@ -316,10 +320,10 @@ public Coder getDestinationCoder() { @Override public String toString() { - MoreObjects.ToStringHelper helper = - MoreObjects.toStringHelper(this) - .add("inner", inner) - .add("jsonTimePartitioning", jsonTimePartitioning); + MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper(this).add("inner", inner); + if (jsonTimePartitioning != null) { + helper.add("jsonTimePartitioning", jsonTimePartitioning); + } if (jsonClustering != null) { helper.add("jsonClustering", jsonClustering); } diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StorageApiWriteUnshardedRecords.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StorageApiWriteUnshardedRecords.java index 3c6c73dd0219..846e7e3bddcb 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StorageApiWriteUnshardedRecords.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StorageApiWriteUnshardedRecords.java @@ -37,7 +37,6 @@ import java.io.IOException; import java.time.Instant; import java.util.Arrays; -import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Objects; @@ -70,7 +69,6 @@ import org.apache.beam.sdk.transforms.Reshuffle; import org.apache.beam.sdk.transforms.windowing.BoundedWindow; import org.apache.beam.sdk.transforms.windowing.GlobalWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; import org.apache.beam.sdk.util.Preconditions; import org.apache.beam.sdk.values.KV; import org.apache.beam.sdk.values.PCollection; @@ -1095,15 +1093,6 @@ public void outputWithTimestamp( BigQueryStorageApiInsertError output, org.joda.time.Instant timestamp) { context.output(failedRowsTag, output, timestamp, GlobalWindow.INSTANCE); } - - @Override - public void outputWindowedValue( - BigQueryStorageApiInsertError output, - org.joda.time.Instant timestamp, - Collection windows, - PaneInfo paneInfo) { - throw new UnsupportedOperationException("outputWindowedValue not supported"); - } }; @Nullable OutputReceiver successfulRowsReceiver = null; if (successfulRowsTag != null) { @@ -1118,15 +1107,6 @@ public void output(TableRow output) { public void outputWithTimestamp(TableRow output, org.joda.time.Instant timestamp) { context.output(successfulRowsTag, output, timestamp, GlobalWindow.INSTANCE); } - - @Override - public void outputWindowedValue( - TableRow output, - org.joda.time.Instant timestamp, - Collection windows, - PaneInfo paneInfo) { - throw new UnsupportedOperationException("outputWindowedValue not supported"); - } }; } diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/UpdateSchemaDestination.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/UpdateSchemaDestination.java index 51e61fe41953..65bb3bf11b1b 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/UpdateSchemaDestination.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/UpdateSchemaDestination.java @@ -288,11 +288,12 @@ private BigQueryHelpers.PendingJob startZeroLoadJob( } if (timePartitioning != null) { loadConfig.setTimePartitioning(timePartitioning); - // only set clustering if timePartitioning is set - if (clustering != null) { - loadConfig.setClustering(clustering); - } } + + if (clustering != null) { + loadConfig.setClustering(clustering); + } + if (kmsKey != null) { loadConfig.setDestinationEncryptionConfiguration( new EncryptionConfiguration().setKmsKeyName(kmsKey)); diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/providers/BigQueryStorageWriteApiSchemaTransformProvider.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/providers/BigQueryStorageWriteApiSchemaTransformProvider.java index d0951cdad1a6..ca0d0a2784bd 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/providers/BigQueryStorageWriteApiSchemaTransformProvider.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/providers/BigQueryStorageWriteApiSchemaTransformProvider.java @@ -383,13 +383,6 @@ public PCollectionRowTuple expand(PCollectionRowTuple input) { Boolean autoSharding = configuration.getAutoSharding(); int numStreams = configuration.getNumStreams() == null ? 0 : configuration.getNumStreams(); - // TODO(https://github.com/apache/beam/issues/30058): remove once Dataflow supports multiple - // DoFn's per fused step. - if (numStreams < 1) { - throw new IllegalStateException( - "numStreams must be set to a positive integer when input data is unbounded."); - } - boolean useAtLeastOnceSemantics = configuration.getUseAtLeastOnceSemantics() != null && configuration.getUseAtLeastOnceSemantics(); diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableServiceImpl.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableServiceImpl.java index 7537a16a188b..d6208be1bf94 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableServiceImpl.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableServiceImpl.java @@ -387,6 +387,11 @@ public void onResponse(Row response) { currentByteSize += response.getSerializedSize(); rows.add(response); if (currentByteSize > maxSegmentByteSize) { + LOG.debug( + "Reached maxSegmentByteSize, cancelling the stream. currentByteSize is {}, maxSegmentByteSize is {}, read rows {}", + currentByteSize, + maxSegmentByteSize, + rows.size()); byteLimitReached = true; controller.cancel(); return; @@ -395,14 +400,25 @@ public void onResponse(Row response) { @Override public void onError(Throwable t) { - future.setException(t); + if (byteLimitReached) { + // When the byte limit is reached we cancel the stream in onResponse. + // In this case we don't want to fail the request with cancellation + // exception. Instead, we construct the next request. + onComplete(); + } else { + future.setException(t); + } } @Override public void onComplete() { ReadRowsRequest nextNextRequest = null; - // When requested rows < limit, the current request will be the last + // Only schedule the next segment fetch when there's a possibility of more + // data to read. We know there might be more data when the current segment + // ended with the artificial byte limit or the row limit. + // If the RPC ended without hitting the byte limit or row limit, we know + // there's no more data to read and nextNextRequest would be null. if (byteLimitReached || rows.size() == nextRequest.getRowsLimit()) { nextNextRequest = truncateRequest(nextRequest, rows.get(rows.size() - 1).getKey()); diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteReadSchemaTransformProvider.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteReadSchemaTransformProvider.java index 98c5f8a6b388..8afe730f32ce 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteReadSchemaTransformProvider.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteReadSchemaTransformProvider.java @@ -38,6 +38,7 @@ import java.util.function.Consumer; import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; +import org.apache.beam.sdk.extensions.protobuf.ProtoByteUtils; import org.apache.beam.sdk.io.gcp.pubsublite.internal.Uuid; import org.apache.beam.sdk.metrics.Counter; import org.apache.beam.sdk.metrics.Metrics; @@ -77,7 +78,7 @@ public class PubsubLiteReadSchemaTransformProvider private static final Logger LOG = LoggerFactory.getLogger(PubsubLiteReadSchemaTransformProvider.class); - public static final String VALID_FORMATS_STR = "RAW,AVRO,JSON"; + public static final String VALID_FORMATS_STR = "RAW,AVRO,JSON,PROTO"; public static final Set VALID_DATA_FORMATS = Sets.newHashSet(VALID_FORMATS_STR.split(",")); @@ -207,26 +208,39 @@ public void finish(FinishBundleContext c) { Schema beamSchema; if (format != null && format.equals("RAW")) { - if (inputSchema != null) { - throw new IllegalArgumentException( - "To read from PubSubLite in RAW format, you can't provide a schema."); - } + beamSchema = Schema.builder().addField("payload", Schema.FieldType.BYTES).build(); valueMapper = getRawBytesToRowFunction(beamSchema); + } else if (format != null && format.equals("PROTO")) { + String fileDescriptorPath = configuration.getFileDescriptorPath(); + String messageName = configuration.getMessageName(); + + if (fileDescriptorPath != null && messageName != null) { + beamSchema = ProtoByteUtils.getBeamSchemaFromProto(fileDescriptorPath, messageName); + valueMapper = ProtoByteUtils.getProtoBytesToRowFunction(fileDescriptorPath, messageName); + } else if (inputSchema != null && messageName != null) { + beamSchema = ProtoByteUtils.getBeamSchemaFromProtoSchema(inputSchema, messageName); + valueMapper = ProtoByteUtils.getProtoBytesToRowFromSchemaFunction(inputSchema, messageName); + } else { + throw new IllegalArgumentException( + "To read from PubSubLite in PROTO format, either descriptorPath or schema must be provided."); + } + } else { - if (inputSchema == null) { + if (inputSchema != null) { + beamSchema = + Objects.equals(configuration.getFormat(), "JSON") + ? JsonUtils.beamSchemaFromJsonSchema(inputSchema) + : AvroUtils.toBeamSchema(new org.apache.avro.Schema.Parser().parse(inputSchema)); + valueMapper = + Objects.equals(configuration.getFormat(), "JSON") + ? JsonUtils.getJsonBytesToRowFunction(beamSchema) + : AvroUtils.getAvroBytesToRowFunction(beamSchema); + } else { throw new IllegalArgumentException( - "To read from PubSubLite in JSON or AVRO format, you must provide a schema."); + "To read from Pubsub Lite in JSON or AVRO format, you must provide a schema."); } - beamSchema = - Objects.equals(configuration.getFormat(), "JSON") - ? JsonUtils.beamSchemaFromJsonSchema(inputSchema) - : AvroUtils.toBeamSchema(new org.apache.avro.Schema.Parser().parse(inputSchema)); - valueMapper = - Objects.equals(configuration.getFormat(), "JSON") - ? JsonUtils.getJsonBytesToRowFunction(beamSchema) - : AvroUtils.getAvroBytesToRowFunction(beamSchema); } return new SchemaTransform() { @Override @@ -404,13 +418,33 @@ public Uuid apply(SequencedMessage input) { @AutoValue @DefaultSchema(AutoValueSchema.class) public abstract static class PubsubLiteReadSchemaTransformConfiguration { + + public void validate() { + final String dataFormat = this.getFormat(); + assert dataFormat == null || VALID_DATA_FORMATS.contains(dataFormat) + : "Valid data formats are " + VALID_DATA_FORMATS; + + final String inputSchema = this.getSchema(); + final String messageName = this.getMessageName(); + + if (dataFormat != null && dataFormat.equals("RAW")) { + assert inputSchema == null + : "To read from Pubsub Lite in RAW format, you can't provide a schema."; + } + + if (dataFormat != null && dataFormat.equals("PROTO")) { + assert messageName != null + : "To read from Pubsub Lite in PROTO format, messageName must be provided."; + } + } + @SchemaFieldDescription( "The encoding format for the data stored in Pubsub Lite. Valid options are: " + VALID_FORMATS_STR) public abstract String getFormat(); @SchemaFieldDescription( - "The schema in which the data is encoded in the Kafka topic. " + "The schema in which the data is encoded in the Pubsub Lite topic. " + "For AVRO data, this is a schema defined with AVRO schema syntax " + "(https://avro.apache.org/docs/1.10.2/spec.html#schemas). " + "For JSON data, this is a schema defined with JSON-schema syntax (https://json-schema.org/).") @@ -459,6 +493,18 @@ public abstract static class PubsubLiteReadSchemaTransformConfiguration { + "case, deduplication of the stream will be strictly best effort.") public abstract @Nullable String getAttributeId(); + @SchemaFieldDescription( + "The path to the Protocol Buffer File Descriptor Set file. This file is used for schema" + + " definition and message serialization.") + @Nullable + public abstract String getFileDescriptorPath(); + + @SchemaFieldDescription( + "The name of the Protocol Buffer message to be used for schema" + + " extraction and data conversion.") + @Nullable + public abstract String getMessageName(); + public static Builder builder() { return new AutoValue_PubsubLiteReadSchemaTransformProvider_PubsubLiteReadSchemaTransformConfiguration .Builder(); @@ -486,6 +532,12 @@ public abstract static class Builder { @SuppressWarnings("unused") public abstract Builder setAttributeId(String attributeId); + @SuppressWarnings("unused") + public abstract Builder setFileDescriptorPath(String fileDescriptorPath); + + @SuppressWarnings("unused") + public abstract Builder setMessageName(String messageName); + public abstract PubsubLiteReadSchemaTransformConfiguration build(); } } diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteWriteSchemaTransformProvider.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteWriteSchemaTransformProvider.java index c669f84f330f..8ba8176035da 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteWriteSchemaTransformProvider.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsublite/PubsubLiteWriteSchemaTransformProvider.java @@ -34,6 +34,7 @@ import java.util.Objects; import java.util.Set; import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; +import org.apache.beam.sdk.extensions.protobuf.ProtoByteUtils; import org.apache.beam.sdk.io.gcp.pubsublite.internal.Uuid; import org.apache.beam.sdk.metrics.Counter; import org.apache.beam.sdk.metrics.Metrics; @@ -71,7 +72,7 @@ public class PubsubLiteWriteSchemaTransformProvider extends TypedSchemaTransformProvider< PubsubLiteWriteSchemaTransformProvider.PubsubLiteWriteSchemaTransformConfiguration> { - public static final String SUPPORTED_FORMATS_STR = "RAW,JSON,AVRO"; + public static final String SUPPORTED_FORMATS_STR = "RAW,JSON,AVRO,PROTO"; public static final Set SUPPORTED_FORMATS = Sets.newHashSet(SUPPORTED_FORMATS_STR.split(",")); public static final TupleTag OUTPUT_TAG = new TupleTag() {}; @@ -211,6 +212,19 @@ public PCollectionRowTuple expand(PCollectionRowTuple input) { "The input schema must have exactly one field of type byte."); } toBytesFn = getRowToRawBytesFunction(inputSchema.getField(0).getName()); + } else if (configuration.getFormat().equals("PROTO")) { + String descriptorPath = configuration.getFileDescriptorPath(); + String schema = configuration.getSchema(); + String messageName = configuration.getMessageName(); + + if (descriptorPath != null && messageName != null) { + toBytesFn = ProtoByteUtils.getRowToProtoBytes(descriptorPath, messageName); + } else if (schema != null && messageName != null) { + toBytesFn = ProtoByteUtils.getRowToProtoBytesFromSchema(schema, messageName); + } else { + throw new IllegalArgumentException( + "At least a descriptorPath or a PROTO schema is required."); + } } else if (configuration.getFormat().equals("JSON")) { toBytesFn = JsonUtils.getRowToJsonBytesFunction(inputSchema); } else { @@ -322,6 +336,20 @@ public byte[] apply(Row input) { @AutoValue @DefaultSchema(AutoValueSchema.class) public abstract static class PubsubLiteWriteSchemaTransformConfiguration { + + public void validate() { + final String dataFormat = this.getFormat(); + final String inputSchema = this.getSchema(); + final String messageName = this.getMessageName(); + final String descriptorPath = this.getFileDescriptorPath(); + + if (dataFormat != null && dataFormat.equals("PROTO")) { + assert messageName != null : "Expecting messageName to be non-null."; + assert descriptorPath != null && inputSchema != null + : "You must include a descriptorPath or a PROTO schema but not both."; + } + } + @SchemaFieldDescription( "The GCP project where the Pubsub Lite reservation resides. This can be a " + "project number of a project ID.") @@ -358,6 +386,18 @@ public abstract static class PubsubLiteWriteSchemaTransformConfiguration { + "in a ReadFromPubSubLite PTransform to deduplicate messages.") public abstract @Nullable String getAttributeId(); + @SchemaFieldDescription( + "The path to the Protocol Buffer File Descriptor Set file. This file is used for schema" + + " definition and message serialization.") + public abstract @Nullable String getFileDescriptorPath(); + + @SchemaFieldDescription( + "The name of the Protocol Buffer message to be used for schema" + + " extraction and data conversion.") + public abstract @Nullable String getMessageName(); + + public abstract @Nullable String getSchema(); + public static Builder builder() { return new AutoValue_PubsubLiteWriteSchemaTransformProvider_PubsubLiteWriteSchemaTransformConfiguration .Builder(); @@ -380,6 +420,15 @@ public abstract static class Builder { @SuppressWarnings("unused") public abstract Builder setAttributeId(String attributeId); + @SuppressWarnings("unused") + public abstract Builder setFileDescriptorPath(String fileDescriptorPath); + + @SuppressWarnings("unused") + public abstract Builder setMessageName(String messageName); + + @SuppressWarnings("unused") + public abstract Builder setSchema(String schema); + public abstract PubsubLiteWriteSchemaTransformConfiguration build(); } } diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java index 3128de45fde3..6db79ab69b47 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java @@ -56,7 +56,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Comparator; import java.util.HashMap; import java.util.List; @@ -103,11 +102,9 @@ import org.apache.beam.sdk.transforms.Wait; import org.apache.beam.sdk.transforms.WithTimestamps; import org.apache.beam.sdk.transforms.display.DisplayData; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; import org.apache.beam.sdk.transforms.windowing.DefaultTrigger; import org.apache.beam.sdk.transforms.windowing.GlobalWindow; import org.apache.beam.sdk.transforms.windowing.GlobalWindows; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; import org.apache.beam.sdk.transforms.windowing.Window; import org.apache.beam.sdk.util.BackOff; import org.apache.beam.sdk.util.FluentBackoff; @@ -2005,15 +2002,6 @@ public void output(Iterable output) { public void outputWithTimestamp(Iterable output, Instant timestamp) { c.output(output, timestamp, GlobalWindow.INSTANCE); } - - @Override - public void outputWindowedValue( - Iterable output, - Instant timestamp, - Collection windows, - PaneInfo paneInfo) { - throw new UnsupportedOperationException("outputWindowedValue not supported"); - } } } diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java index 89cbc2cd24b8..21d3e53a0701 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java @@ -500,7 +500,7 @@ private void verifySideInputs() { } } - void testTimePartitioningClustering( + void testTimePartitioningAndClustering( BigQueryIO.Write.Method insertMethod, boolean enablePartitioning, boolean enableClustering) throws Exception { TableRow row1 = new TableRow().set("date", "2018-01-01").set("number", "1"); @@ -545,16 +545,8 @@ void testTimePartitioningClustering( } } - void testTimePartitioning(BigQueryIO.Write.Method insertMethod) throws Exception { - testTimePartitioningClustering(insertMethod, true, false); - } - - void testClustering(BigQueryIO.Write.Method insertMethod) throws Exception { - testTimePartitioningClustering(insertMethod, true, true); - } - - @Test - public void testTimePartitioning() throws Exception { + void testTimePartitioningAndClusteringWithAllMethods( + Boolean enablePartitioning, Boolean enableClustering) throws Exception { BigQueryIO.Write.Method method; if (useStorageApi) { method = @@ -564,15 +556,27 @@ public void testTimePartitioning() throws Exception { } else { method = Method.FILE_LOADS; } - testTimePartitioning(method); + testTimePartitioningAndClustering(method, enablePartitioning, enableClustering); } @Test - public void testClusteringStorageApi() throws Exception { - if (useStorageApi) { - testClustering( - useStorageApiApproximate ? Method.STORAGE_API_AT_LEAST_ONCE : Method.STORAGE_WRITE_API); - } + public void testTimePartitioningWithoutClustering() throws Exception { + testTimePartitioningAndClusteringWithAllMethods(true, false); + } + + @Test + public void testTimePartitioningWithClustering() throws Exception { + testTimePartitioningAndClusteringWithAllMethods(true, true); + } + + @Test + public void testClusteringWithoutPartitioning() throws Exception { + testTimePartitioningAndClusteringWithAllMethods(false, true); + } + + @Test + public void testNoClusteringNoPartitioning() throws Exception { + testTimePartitioningAndClusteringWithAllMethods(false, false); } @Test diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySinkMetricsTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySinkMetricsTest.java index 6b04ed0acc15..8b960a8c6c6d 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySinkMetricsTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySinkMetricsTest.java @@ -20,14 +20,13 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.is; -import static org.hamcrest.Matchers.nullValue; import com.google.cloud.bigquery.storage.v1.AppendRowsResponse; import com.google.cloud.bigquery.storage.v1.Exceptions; import io.grpc.Status; import java.time.Instant; import java.util.List; +import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import org.apache.beam.runners.core.metrics.CounterCell; import org.apache.beam.runners.core.metrics.MetricsContainerImpl; @@ -112,7 +111,7 @@ public void testAppendRowsRowStatusCounter() throws Exception { deletesDisabledCounter.inc(); MetricName deletesDisabledCounterName = MetricName.named( - "BigQuerySink", "RowsAppendedCount-row_status:SUCCESSFUL;rpc_status:rpcStatus;"); + "BigQuerySink", "RowsAppendedCount*row_status:SUCCESSFUL;rpc_status:rpcStatus;"); assertThat(testContainer.perWorkerCounters, IsMapContaining.hasKey(deletesDisabledCounterName)); assertThat( testContainer.perWorkerCounters.get(deletesDisabledCounterName).getCumulative(), @@ -127,7 +126,7 @@ public void testAppendRowsRowStatusCounter() throws Exception { MetricName deletesEnabledCounterName = MetricName.named( "BigQuerySink", - "RowsAppendedCount-row_status:SUCCESSFUL;rpc_status:rpcStatus;table_id:tableId;"); + "RowsAppendedCount*row_status:SUCCESSFUL;rpc_status:rpcStatus;table_id:tableId;"); assertThat(testContainer.perWorkerCounters, IsMapContaining.hasKey(deletesEnabledCounterName)); assertThat( testContainer.perWorkerCounters.get(deletesEnabledCounterName).getCumulative(), @@ -160,9 +159,10 @@ public void testThrottledTimeCounter() throws Exception { appendRowsThrottleCounter.inc(1); assertThat( appendRowsThrottleCounter.getName().getName(), - equalTo("ThrottledTime-rpc_method:APPEND_ROWS;")); + equalTo("ThrottledTime*rpc_method:APPEND_ROWS;")); + MetricName counterName = - MetricName.named("BigQuerySink", "ThrottledTime-rpc_method:APPEND_ROWS;"); + MetricName.named("BigQuerySink", "ThrottledTime*rpc_method:APPEND_ROWS;"); assertThat(testContainer.perWorkerCounters, IsMapContaining.hasKey(counterName)); assertThat(testContainer.perWorkerCounters.get(counterName).getCumulative(), equalTo(1L)); } @@ -182,9 +182,9 @@ public void testReportSuccessfulRpcMetrics() throws Exception { BigQuerySinkMetrics.reportSuccessfulRpcMetrics( c, BigQuerySinkMetrics.RpcMethod.APPEND_ROWS, "tableId"); MetricName counterNameDisabledDeletes = - MetricName.named("BigQuerySink", "RpcRequestsCount-rpc_method:APPEND_ROWS;rpc_status:OK;"); + MetricName.named("BigQuerySink", "RpcRequestsCount*rpc_method:APPEND_ROWS;rpc_status:OK;"); MetricName histogramName = - MetricName.named("BigQuerySink", "RpcLatency-rpc_method:APPEND_ROWS;"); + MetricName.named("BigQuerySink", "RpcLatency*rpc_method:APPEND_ROWS;"); HistogramData.BucketType bucketType = HistogramData.ExponentialBuckets.of(1, 34); assertThat(testContainer.perWorkerCounters, IsMapContaining.hasKey(counterNameDisabledDeletes)); assertThat( @@ -202,7 +202,7 @@ public void testReportSuccessfulRpcMetrics() throws Exception { MetricName counterNameEnabledDeletes = MetricName.named( "BigQuerySink", - "RpcRequestsCount-rpc_method:APPEND_ROWS;rpc_status:OK;table_id:tableId;"); + "RpcRequestsCount*rpc_method:APPEND_ROWS;rpc_status:OK;table_id:tableId;"); assertThat(testContainer.perWorkerCounters, IsMapContaining.hasKey(counterNameEnabledDeletes)); assertThat( testContainer.perWorkerCounters.get(counterNameEnabledDeletes).getCumulative(), @@ -232,9 +232,9 @@ public void testReportFailedRPCMetrics_KnownGrpcError() throws Exception { c, BigQuerySinkMetrics.RpcMethod.APPEND_ROWS, "tableId"); MetricName counterNameDisabledDeletes = MetricName.named( - "BigQuerySink", "RpcRequestsCount-rpc_method:APPEND_ROWS;rpc_status:NOT_FOUND;"); + "BigQuerySink", "RpcRequestsCount*rpc_method:APPEND_ROWS;rpc_status:NOT_FOUND;"); MetricName histogramName = - MetricName.named("BigQuerySink", "RpcLatency-rpc_method:APPEND_ROWS;"); + MetricName.named("BigQuerySink", "RpcLatency*rpc_method:APPEND_ROWS;"); HistogramData.BucketType bucketType = HistogramData.ExponentialBuckets.of(1, 34); assertThat(testContainer.perWorkerCounters, IsMapContaining.hasKey(counterNameDisabledDeletes)); assertThat( @@ -255,7 +255,7 @@ public void testReportFailedRPCMetrics_KnownGrpcError() throws Exception { MetricName counterNameEnabledDeletes = MetricName.named( "BigQuerySink", - "RpcRequestsCount-rpc_method:APPEND_ROWS;rpc_status:NOT_FOUND;table_id:tableId;"); + "RpcRequestsCount*rpc_method:APPEND_ROWS;rpc_status:NOT_FOUND;table_id:tableId;"); assertThat(testContainer.perWorkerCounters, IsMapContaining.hasKey(counterNameEnabledDeletes)); assertThat( testContainer.perWorkerCounters.get(counterNameEnabledDeletes).getCumulative(), @@ -284,9 +284,9 @@ public void testReportFailedRPCMetrics_UnknownGrpcError() throws Exception { c, BigQuerySinkMetrics.RpcMethod.APPEND_ROWS, "tableId"); MetricName counterNameDisabledDeletes = MetricName.named( - "BigQuerySink", "RpcRequestsCount-rpc_method:APPEND_ROWS;rpc_status:UNKNOWN;"); + "BigQuerySink", "RpcRequestsCount*rpc_method:APPEND_ROWS;rpc_status:UNKNOWN;"); MetricName histogramName = - MetricName.named("BigQuerySink", "RpcLatency-rpc_method:APPEND_ROWS;"); + MetricName.named("BigQuerySink", "RpcLatency*rpc_method:APPEND_ROWS;"); HistogramData.BucketType bucketType = HistogramData.ExponentialBuckets.of(1, 34); assertThat(testContainer.perWorkerCounters, IsMapContaining.hasKey(counterNameDisabledDeletes)); assertThat( @@ -304,7 +304,7 @@ public void testReportFailedRPCMetrics_UnknownGrpcError() throws Exception { MetricName counterNameEnabledDeletes = MetricName.named( "BigQuerySink", - "RpcRequestsCount-rpc_method:APPEND_ROWS;rpc_status:UNKNOWN;table_id:tableId;"); + "RpcRequestsCount*rpc_method:APPEND_ROWS;rpc_status:UNKNOWN;table_id:tableId;"); assertThat(testContainer.perWorkerCounters, IsMapContaining.hasKey(counterNameEnabledDeletes)); assertThat( testContainer.perWorkerCounters.get(counterNameEnabledDeletes).getCumulative(), @@ -317,41 +317,46 @@ public void testReportFailedRPCMetrics_UnknownGrpcError() throws Exception { @Test public void testParseMetricName_noLabels() { String baseMetricName = "baseMetricName"; - BigQuerySinkMetrics.ParsedMetricName metricName = + BigQuerySinkMetrics.ParsedMetricName expectedName = BigQuerySinkMetrics.ParsedMetricName.create(baseMetricName); - assertThat(BigQuerySinkMetrics.parseMetricName(baseMetricName), equalTo(metricName)); + Optional parsedMetricName = + BigQuerySinkMetrics.parseMetricName(baseMetricName); + assertThat(parsedMetricName.isPresent(), equalTo(true)); + assertThat(parsedMetricName.get(), equalTo(expectedName)); } @Test public void testParseMetricName_successfulLabels() { - String metricName = "baseLabel-key1:val1;key2:val2;key3:val3;"; + String metricName = "baseLabel*key1:val1;key2:val2;key3:val3;"; ImmutableMap metricLabels = ImmutableMap.of("key1", "val1", "key2", "val2", "key3", "val3"); BigQuerySinkMetrics.ParsedMetricName expectedName = BigQuerySinkMetrics.ParsedMetricName.create("baseLabel", metricLabels); - assertThat(BigQuerySinkMetrics.parseMetricName(metricName), equalTo(expectedName)); - } + Optional parsedMetricName = + BigQuerySinkMetrics.parseMetricName(metricName); - @Test - public void testParseMetricName_malformedMetricName() { - String malformedMetricName = "baseLabel-key1:val1-key2:val2"; - assertThat(BigQuerySinkMetrics.parseMetricName(malformedMetricName), is(nullValue())); + assertThat(parsedMetricName.isPresent(), equalTo(true)); + assertThat(parsedMetricName.get(), equalTo(expectedName)); } @Test public void testParseMetricName_malformedMetricLabels() { - String metricName = "baseLabel-key1:val1:malformedField;key2:val2;"; + String metricName = "baseLabel*malformed_kv_pair;key2:val2;"; ImmutableMap metricLabels = ImmutableMap.of("key2", "val2"); BigQuerySinkMetrics.ParsedMetricName expectedName = BigQuerySinkMetrics.ParsedMetricName.create("baseLabel", metricLabels); - assertThat(BigQuerySinkMetrics.parseMetricName(metricName), equalTo(expectedName)); + Optional parsedMetricName = + BigQuerySinkMetrics.parseMetricName(metricName); + + assertThat(parsedMetricName.isPresent(), equalTo(true)); + assertThat(parsedMetricName.get(), equalTo(expectedName)); } @Test public void testParseMetricName_emptyString() { - assertThat(BigQuerySinkMetrics.parseMetricName(""), is(nullValue())); + assertThat(BigQuerySinkMetrics.parseMetricName("").isPresent(), equalTo(false)); } } diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableReadIT.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableReadIT.java index 704a895992c2..bc88858ebc33 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableReadIT.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableReadIT.java @@ -17,7 +17,16 @@ */ package org.apache.beam.sdk.io.gcp.bigtable; +import com.google.cloud.bigtable.admin.v2.BigtableTableAdminClient; +import com.google.cloud.bigtable.admin.v2.BigtableTableAdminSettings; +import com.google.cloud.bigtable.admin.v2.models.CreateTableRequest; import com.google.cloud.bigtable.config.BigtableOptions; +import com.google.cloud.bigtable.data.v2.BigtableDataClient; +import com.google.cloud.bigtable.data.v2.BigtableDataSettings; +import com.google.cloud.bigtable.data.v2.models.RowMutation; +import java.io.IOException; +import java.util.Date; +import org.apache.beam.repackaged.core.org.apache.commons.lang3.StringUtils; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; import org.apache.beam.sdk.options.PipelineOptionsFactory; @@ -25,6 +34,8 @@ import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Count; import org.apache.beam.sdk.values.PCollection; +import org.junit.After; +import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -32,18 +43,62 @@ /** End-to-end tests of BigtableRead. */ @RunWith(JUnit4.class) public class BigtableReadIT { + private static final String COLUMN_FAMILY_NAME = "cf"; - @Test - public void testE2EBigtableRead() throws Exception { + private String project; + + private BigtableTestOptions options; + private String tableId = String.format("BigtableReadIT-%tF-% count = + p.apply( + BigtableIO.read() + .withBigtableOptions(bigtableOptionsBuilder) + .withTableId(tableId) + .withMaxBufferElementCount(10)) + .apply(Count.globally()); + PAssert.thatSingleton(count).isEqualTo(numRows); + p.run(); + } } diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/healthcare/FhirIOSearchIT.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/healthcare/FhirIOSearchIT.java index fc92a568ba4d..ebd94123d499 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/healthcare/FhirIOSearchIT.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/healthcare/FhirIOSearchIT.java @@ -43,7 +43,6 @@ import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; import org.junit.After; import org.junit.Before; -import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; @@ -66,7 +65,7 @@ public static Collection versions() { private static final String BASE_STORE_ID = "FHIR_store_search_it_" + System.currentTimeMillis() + "_" + new SecureRandom().nextInt(32); private String fhirStoreId; - private static final int MAX_NUM_OF_SEARCHES = 50; + private static final int MAX_NUM_OF_SEARCHES = 20; private List> input = new ArrayList<>(); private List>> genericParametersInput = new ArrayList<>(); private static final String KEY = "key"; @@ -96,9 +95,9 @@ public void setup() throws Exception { JsonArray fhirResources = JsonParser.parseString(bundles.get(0)).getAsJsonObject().getAsJsonArray("entry"); - Map searchParameters = ImmutableMap.of("_count", "50"); + Map searchParameters = ImmutableMap.of("_count", "20"); Map> genericSearchParameters = - ImmutableMap.of("_count", Arrays.asList(50)); + ImmutableMap.of("_count", Arrays.asList(20)); // Include a non-resource type search. input.add(FhirSearchParameter.of("", KEY, searchParameters)); @@ -127,7 +126,6 @@ public void teardown() throws IOException { } } - @Ignore("https://github.com/apache/beam/issues/28505") @Test public void testFhirIOSearch() { pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false); @@ -157,7 +155,6 @@ public void testFhirIOSearch() { pipeline.run().waitUntilFinish(); } - @Ignore("https://github.com/apache/beam/issues/28505") @Test public void testFhirIOSearchWithGenericParameters() { pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false); diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsublite/internal/PubsubLiteDlqTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsublite/internal/PubsubLiteDlqTest.java index e23536d800e5..4acf0a1149e1 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsublite/internal/PubsubLiteDlqTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsublite/internal/PubsubLiteDlqTest.java @@ -31,6 +31,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.beam.sdk.extensions.protobuf.ProtoByteUtils; import org.apache.beam.sdk.io.gcp.pubsublite.PubsubLiteIO; import org.apache.beam.sdk.io.gcp.pubsublite.PubsubLiteReadSchemaTransformProvider; import org.apache.beam.sdk.io.gcp.pubsublite.PubsubLiteReadSchemaTransformProvider.ErrorFn; @@ -252,6 +253,67 @@ public class PubsubLiteDlqTest { .build()) .build()); + private static final String PROTO_STRING_SCHEMA = + "syntax = \"proto3\";\n" + + "package com.test.proto;" + + "\n" + + "message MyMessage {\n" + + " int32 id = 1;\n" + + " string name = 2;\n" + + " bool active = 3;\n" + + "\n" + + " // Nested field\n" + + " message Address {\n" + + " string street = 1;\n" + + " string city = 2;\n" + + " string state = 3;\n" + + " string zip_code = 4;\n" + + " }\n" + + "\n" + + " Address address = 4;\n" + + "}"; + + private static final Schema BEAM_PROTO_SCHEMA = + Schema.builder() + .addField("id", Schema.FieldType.INT32) + .addField("name", Schema.FieldType.STRING) + .addField("active", Schema.FieldType.BOOLEAN) + .addField( + "address", + Schema.FieldType.row( + Schema.builder() + .addField("city", Schema.FieldType.STRING) + .addField("street", Schema.FieldType.STRING) + .addField("state", Schema.FieldType.STRING) + .addField("zip_code", Schema.FieldType.STRING) + .build())) + .build(); + + private static final Row INPUT_ROW = + Row.withSchema(BEAM_PROTO_SCHEMA) + .withFieldValue("id", 1234) + .withFieldValue("name", "Doe") + .withFieldValue("active", false) + .withFieldValue("address.city", "seattle") + .withFieldValue("address.street", "fake street") + .withFieldValue("address.zip_code", "TO-1234") + .withFieldValue("address.state", "wa") + .build(); + private static final SerializableFunction INPUT_MAPPER = + ProtoByteUtils.getRowToProtoBytesFromSchema(PROTO_STRING_SCHEMA, "com.test.proto.MyMessage"); + + private static final byte[] INPUT_SOURCE = INPUT_MAPPER.apply(INPUT_ROW); + + private static final List INPUT_MESSAGES = + Collections.singletonList( + SequencedMessage.newBuilder() + .setMessage( + PubSubMessage.newBuilder() + .setData(ByteString.copyFrom(INPUT_SOURCE)) + .putAllAttributes(ATTRIBUTE_VALUES_MAP) + .build()) + .build()); + final SerializableFunction valueMapper = JsonUtils.getJsonBytesToRowFunction(BEAM_SCHEMA); @@ -471,4 +533,31 @@ public void testPubsubLiteErrorFnWithDedupingSuccess() { p.run().waitUntilFinish(); } + + @Test + public void testPubSubLiteErrorFnReadProto() { + Schema errorSchema = ErrorHandling.errorSchemaBytes(); + + List attributes = new ArrayList<>(); + String attributesMap = ""; + Schema beamAttributeSchema = + PubsubLiteReadSchemaTransformProvider.buildSchemaWithAttributes( + BEAM_PROTO_SCHEMA, attributes, attributesMap); + + SerializableFunction protoValueMapper = + ProtoByteUtils.getProtoBytesToRowFromSchemaFunction( + PROTO_STRING_SCHEMA, "com.test.proto.MyMessage"); + + PCollection input = p.apply(Create.of(INPUT_MESSAGES)); + PCollectionTuple output = + input.apply( + ParDo.of(new ErrorFn("Read-Error-Counter", protoValueMapper, errorSchema, Boolean.TRUE)) + .withOutputTags(OUTPUT_TAG, TupleTagList.of(ERROR_TAG))); + + output.get(OUTPUT_TAG).setRowSchema(beamAttributeSchema); + output.get(ERROR_TAG).setRowSchema(errorSchema); + + PAssert.that(output.get(OUTPUT_TAG)).containsInAnyOrder(INPUT_ROW); + p.run().waitUntilFinish(); + } } diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsublite/internal/PubsubLiteWriteDlqTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsublite/internal/PubsubLiteWriteDlqTest.java index eac0e4264c1a..5afa4b7e5162 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsublite/internal/PubsubLiteWriteDlqTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsublite/internal/PubsubLiteWriteDlqTest.java @@ -27,6 +27,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.beam.sdk.extensions.protobuf.ProtoByteUtils; import org.apache.beam.sdk.io.gcp.pubsublite.PubsubLiteWriteSchemaTransformProvider; import org.apache.beam.sdk.io.gcp.pubsublite.PubsubLiteWriteSchemaTransformProvider.ErrorCounterFn; import org.apache.beam.sdk.schemas.Schema; @@ -111,6 +112,14 @@ public class PubsubLiteWriteDlqTest { .withFieldValue("key2", "second_key") .build()); + private static final String PROTO_STRING_SCHEMA = + "syntax = \"proto3\";\n" + + "package com.test.proto;" + + "\n" + + "message MyMessage {\n" + + " string name = 1;\n" + + "}"; + private static final Map ATTRIBUTE_VALUES_MAP = new HashMap<>(); static { @@ -239,4 +248,26 @@ public void testPubsubLiteErrorFnSuccessWithAttributesAndDedupingSuccess() { PAssert.that(count).containsInAnyOrder(Collections.singletonList(3L)); p.run().waitUntilFinish(); } + + @Test + public void testPubsubLiteErrorFnSuccessProto() { + Schema errorSchema = ErrorHandling.errorSchemaBytes(); + + SerializableFunction valueMapperProto = + ProtoByteUtils.getRowToProtoBytesFromSchema( + PROTO_STRING_SCHEMA, "com.test.proto.MyMessage"); + + PCollection input = p.apply(Create.of(ROWS)); + PCollectionTuple output = + input.apply( + ParDo.of( + new ErrorCounterFn("ErrorCounter", valueMapperProto, errorSchema, Boolean.TRUE)) + .withOutputTags(OUTPUT_TAG, TupleTagList.of(ERROR_TAG))); + + output.get(ERROR_TAG).setRowSchema(errorSchema); + + PAssert.that(output.get(OUTPUT_TAG).apply(Count.globally())) + .containsInAnyOrder(Collections.singletonList(3L)); + p.run().waitUntilFinish(); + } } diff --git a/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcIOAutoPartitioningIT.java b/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcIOAutoPartitioningIT.java index 0042d93aedfc..dc94b65cf25a 100644 --- a/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcIOAutoPartitioningIT.java +++ b/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcIOAutoPartitioningIT.java @@ -125,7 +125,10 @@ public void evaluate() throws Throwable { } }; - @ClassRule public static JdbcDatabaseContainer mysql = new MySQLContainer<>("mysql"); + // TODO(yathu) unpin tag when the fix of + // https://github.com/testcontainers/testcontainers-java/issues/8130 + // released and upgraded in Beam + @ClassRule public static JdbcDatabaseContainer mysql = new MySQLContainer<>("mysql:8.2"); @ClassRule public static JdbcDatabaseContainer postgres = new PostgreSQLContainer<>("postgres"); diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java index 845d974af0b4..48b5b060a295 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java @@ -50,8 +50,6 @@ import org.apache.beam.sdk.transforms.errorhandling.BadRecord; import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.DefaultErrorHandler; import org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; import org.apache.beam.sdk.values.KV; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PCollection.IsBounded; @@ -342,15 +340,6 @@ public void outputWithTimestamp( records.add(output); } - @Override - public void outputWindowedValue( - T output, - Instant timestamp, - Collection windows, - PaneInfo paneInfo) { - throw new UnsupportedOperationException("Not expecting outputWindowedValue"); - } - public List getOutputs() { return this.records; } diff --git a/sdks/java/io/pulsar/src/test/java/org/apache/beam/sdk/io/pulsar/ReadFromPulsarDoFnTest.java b/sdks/java/io/pulsar/src/test/java/org/apache/beam/sdk/io/pulsar/ReadFromPulsarDoFnTest.java index b72fe423efb1..273a1915d2bb 100644 --- a/sdks/java/io/pulsar/src/test/java/org/apache/beam/sdk/io/pulsar/ReadFromPulsarDoFnTest.java +++ b/sdks/java/io/pulsar/src/test/java/org/apache/beam/sdk/io/pulsar/ReadFromPulsarDoFnTest.java @@ -21,14 +21,11 @@ import static org.junit.Assert.assertTrue; import java.util.ArrayList; -import java.util.Collection; import java.util.List; import org.apache.beam.sdk.io.range.OffsetRange; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; import org.apache.pulsar.client.api.MessageId; import org.apache.pulsar.client.api.PulsarClient; import org.apache.pulsar.client.internal.DefaultImplementation; @@ -176,16 +173,6 @@ public void outputWithTimestamp( records.add(output); } - @Override - public void outputWindowedValue( - PulsarMessage output, - Instant timestamp, - Collection windows, - PaneInfo paneInfo) { - throw new UnsupportedOperationException( - "unsupported outputWindowedValue in mock outputreceiver"); - } - public List getOutputs() { return records; } diff --git a/sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ReadFromSparkReceiverWithOffsetDoFnTest.java b/sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ReadFromSparkReceiverWithOffsetDoFnTest.java index bb0e6524241d..33827164c6b7 100644 --- a/sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ReadFromSparkReceiverWithOffsetDoFnTest.java +++ b/sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ReadFromSparkReceiverWithOffsetDoFnTest.java @@ -22,15 +22,12 @@ import static org.junit.Assert.assertTrue; import java.util.ArrayList; -import java.util.Collection; import java.util.List; import org.apache.beam.sdk.io.range.OffsetRange; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.splittabledofn.ManualWatermarkEstimator; import org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker; import org.apache.beam.sdk.transforms.splittabledofn.SplitResult; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; import org.checkerframework.checker.initialization.qual.Initialized; import org.checkerframework.checker.nullness.qual.NonNull; import org.checkerframework.checker.nullness.qual.UnknownKeyFor; @@ -67,16 +64,6 @@ public void outputWithTimestamp( records.add(output); } - @Override - public void outputWindowedValue( - String output, - Instant timestamp, - Collection windows, - PaneInfo paneInfo) { - throw new UnsupportedOperationException( - "Not expecting to receive call to outputWindowedValue"); - } - public List getOutputs() { return this.records; } diff --git a/sdks/java/transform-service/app/build.gradle b/sdks/java/transform-service/app/build.gradle new file mode 100644 index 000000000000..bf7a47a462e2 --- /dev/null +++ b/sdks/java/transform-service/app/build.gradle @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +apply plugin: 'org.apache.beam.module' +apply plugin: 'application' + +applyJavaNature( + automaticModuleName: 'org.apache.beam.sdk.transformservice.app', + exportJavadoc: false, + validateShadowJar: false, + shadowClosure: {}, +) +mainClassName = "org.apache.beam.sdk.transformservice.launcher.TransformServiceLauncher" + +description = "Apache Beam :: SDKs :: Java :: Transform Service :: App" +ext.summary = """Runs a transform service.""" + +dependencies { + implementation project(":sdks:java:transform-service:launcher") + permitUnusedDeclared project(":sdks:java:transform-service:launcher") + runtimeOnly library.java.slf4j_jdk14 +} + +jar { + manifest { + attributes 'Main-Class': application.mainClass + } +} diff --git a/sdks/java/transform-service/build.gradle b/sdks/java/transform-service/build.gradle index 702479fdf30a..2fee0b4f1708 100644 --- a/sdks/java/transform-service/build.gradle +++ b/sdks/java/transform-service/build.gradle @@ -19,7 +19,7 @@ apply plugin: 'org.apache.beam.module' applyJavaNature( - automaticModuleName: 'org.apache.beam.sdk.transform.service', + automaticModuleName: 'org.apache.beam.sdk.transformservice', exportJavadoc: false, validateShadowJar: false, shadowClosure: {}, @@ -42,7 +42,6 @@ dependencies { implementation project(path: ":model:job-management", configuration: "shadow") implementation project(path: ":sdks:java:core", configuration: "shadow") implementation project(path: ":runners:core-construction-java") - implementation project(path: ":sdks:java:fn-execution") implementation library.java.vendored_grpc_1_60_1 implementation library.java.vendored_guava_32_1_2_jre implementation library.java.jackson_annotations diff --git a/sdks/java/transform-service/launcher/build.gradle b/sdks/java/transform-service/launcher/build.gradle index 0952f37109eb..5ae514af1dc9 100644 --- a/sdks/java/transform-service/launcher/build.gradle +++ b/sdks/java/transform-service/launcher/build.gradle @@ -16,17 +16,11 @@ * limitations under the License. */ apply plugin: 'org.apache.beam.module' -apply plugin: 'application' -mainClassName = "org.apache.beam.sdk.transformservice.launcher.TransformServiceLauncher" applyJavaNature( - automaticModuleName: 'org.apache.beam.sdk.transform.service', - exportJavadoc: false, - validateShadowJar: false, - shadowClosure: {}, + automaticModuleName: 'org.apache.beam.sdk.transformservice.launcher', ) - description = "Apache Beam :: SDKs :: Java :: Transform Service :: Launcher" ext.summary = """Contains code that can be used to run an transform service.""" @@ -34,20 +28,18 @@ ext.summary = """Contains code that can be used to run an transform service.""" // Exclude tests that need a runner test { systemProperty "beamUseDummyRunner", "true" - useJUnit { - excludeCategories "org.apache.beam.sdk.testing.NeedsRunner" - } + useJUnit { } } dependencies { - shadow library.java.vendored_guava_32_1_2_jre - shadow library.java.slf4j_api - shadow library.java.args4j - shadow library.java.error_prone_annotations + // This library is designed to be light-weight, having minimum amount of dependencies and is used by Beam runner artifacts. + implementation library.java.vendored_guava_32_1_2_jre + implementation library.java.slf4j_api + implementation library.java.args4j + compileOnly library.java.error_prone_annotations permitUnusedDeclared(library.java.error_prone_annotations) testImplementation library.java.junit testImplementation library.java.mockito_core - testImplementation project(path: ":sdks:java:core") } sourceSets { @@ -58,9 +50,3 @@ sourceSets { output.resourcesDir = "$buildDir/resources/docker-compose/$name" } } - -jar { - manifest { - attributes 'Main-Class': application.mainClass - } -} diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py index bba8b8a4af75..7648ab4064da 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery.py +++ b/sdks/python/apache_beam/io/gcp/bigquery.py @@ -361,6 +361,7 @@ def chain_after(result): import json import logging import random +import secrets import time import uuid import warnings @@ -2925,8 +2926,9 @@ def __init__( self.table = table self.validate() - # We use this internal object ID to generate BigQuery export directories. - self.obj_id = random.randint(0, 100000) + # We use this internal object ID to generate BigQuery export directories + # and to create BigQuery job names + self.obj_id = '%d_%s' % (int(time.time()), secrets.token_hex(3)) def validate(self): if self.table is not None and self.query is not None: diff --git a/sdks/python/apache_beam/io/gcp/gcsio.py b/sdks/python/apache_beam/io/gcp/gcsio.py index 087d32a9e054..b5a291428767 100644 --- a/sdks/python/apache_beam/io/gcp/gcsio.py +++ b/sdks/python/apache_beam/io/gcp/gcsio.py @@ -175,17 +175,14 @@ def open( ValueError: Invalid open file mode. """ bucket_name, blob_name = parse_gcs_path(filename) - bucket = self.client.get_bucket(bucket_name) + bucket = self.client.bucket(bucket_name) if mode == 'r' or mode == 'rb': - blob = bucket.get_blob(blob_name) + blob = bucket.blob(blob_name) return BeamBlobReader(blob, chunk_size=read_buffer_size) elif mode == 'w' or mode == 'wb': - blob = bucket.get_blob(blob_name) - if not blob: - blob = storage.Blob(blob_name, bucket) + blob = bucket.blob(blob_name) return BeamBlobWriter(blob, mime_type) - else: raise ValueError('Invalid file open mode: %s.' % mode) @@ -199,7 +196,7 @@ def delete(self, path): """ bucket_name, blob_name = parse_gcs_path(path) try: - bucket = self.client.get_bucket(bucket_name) + bucket = self.client.bucket(bucket_name) bucket.delete_blob(blob_name) except NotFound: return @@ -208,7 +205,8 @@ def delete_batch(self, paths): """Deletes the objects at the given GCS paths. Args: - paths: List of GCS file path patterns in the form gs:///, + paths: List of GCS file path patterns or Dict with GCS file path patterns + as keys. The patterns are in the form gs:///, but not to exceed MAX_BATCH_OPERATION_SIZE in length. Returns: List of tuples of (path, exception) in the same order as the @@ -217,6 +215,7 @@ def delete_batch(self, paths): """ final_results = [] s = 0 + if not isinstance(paths, list): paths = list(iter(paths)) while s < len(paths): if (s + MAX_BATCH_OPERATION_SIZE) < len(paths): current_paths = paths[s:s + MAX_BATCH_OPERATION_SIZE] @@ -226,16 +225,15 @@ def delete_batch(self, paths): with current_batch: for path in current_paths: bucket_name, blob_name = parse_gcs_path(path) - bucket = self.client.get_bucket(bucket_name) + bucket = self.client.bucket(bucket_name) bucket.delete_blob(blob_name) for i, path in enumerate(current_paths): error_code = None - for j in range(2): - resp = current_batch._responses[2 * i + j] - if resp.status_code >= 400 and resp.status_code != 404: - error_code = resp.status_code - break + resp = current_batch._responses[i] + if resp.status_code >= 400 and resp.status_code != 404: + error_code = resp.status_code + break final_results.append((path, error_code)) s += MAX_BATCH_OPERATION_SIZE @@ -256,11 +254,9 @@ def copy(self, src, dest): """ src_bucket_name, src_blob_name = parse_gcs_path(src) dest_bucket_name, dest_blob_name= parse_gcs_path(dest, object_optional=True) - src_bucket = self.get_bucket(src_bucket_name) - src_blob = src_bucket.get_blob(src_blob_name) - if not src_blob: - raise NotFound("Source %s not found", src) - dest_bucket = self.get_bucket(dest_bucket_name) + src_bucket = self.client.bucket(src_bucket_name) + src_blob = src_bucket.blob(src_blob_name) + dest_bucket = self.client.bucket(dest_bucket_name) if not dest_blob_name: dest_blob_name = None src_bucket.copy_blob(src_blob, dest_bucket, new_name=dest_blob_name) @@ -289,19 +285,18 @@ def copy_batch(self, src_dest_pairs): for pair in current_pairs: src_bucket_name, src_blob_name = parse_gcs_path(pair[0]) dest_bucket_name, dest_blob_name = parse_gcs_path(pair[1]) - src_bucket = self.client.get_bucket(src_bucket_name) - src_blob = src_bucket.get_blob(src_blob_name) - dest_bucket = self.client.get_bucket(dest_bucket_name) + src_bucket = self.client.bucket(src_bucket_name) + src_blob = src_bucket.blob(src_blob_name) + dest_bucket = self.client.bucket(dest_bucket_name) src_bucket.copy_blob(src_blob, dest_bucket, dest_blob_name) for i, pair in enumerate(current_pairs): error_code = None - for j in range(4): - resp = current_batch._responses[4 * i + j] - if resp.status_code >= 400: - error_code = resp.status_code - break + resp = current_batch._responses[i] + if resp.status_code >= 400: + error_code = resp.status_code + break final_results.append((pair[0], pair[1], error_code)) s += MAX_BATCH_OPERATION_SIZE @@ -415,12 +410,12 @@ def _gcs_object(self, path): """Returns a gcs object for the given path This method does not perform glob expansion. Hence the given path must be - for a single GCS object. + for a single GCS object. The method will make HTTP requests. Returns: GCS object. """ bucket_name, blob_name = parse_gcs_path(path) - bucket = self.client.get_bucket(bucket_name) + bucket = self.client.bucket(bucket_name) blob = bucket.get_blob(blob_name) if blob: return blob @@ -468,7 +463,7 @@ def list_files(self, path, with_metadata=False): _LOGGER.debug("Starting the file information of the input") else: _LOGGER.debug("Starting the size estimation of the input") - bucket = self.client.get_bucket(bucket_name) + bucket = self.client.bucket(bucket_name) response = self.client.list_blobs(bucket, prefix=prefix) for item in response: file_name = 'gs://%s/%s' % (item.bucket.name, item.name) diff --git a/sdks/python/apache_beam/io/gcp/gcsio_test.py b/sdks/python/apache_beam/io/gcp/gcsio_test.py index f8b580c91c95..c9a7fb72f779 100644 --- a/sdks/python/apache_beam/io/gcp/gcsio_test.py +++ b/sdks/python/apache_beam/io/gcp/gcsio_test.py @@ -43,9 +43,15 @@ class FakeGcsClient(object): def __init__(self): self.buckets = {} + def _add_bucket(self, bucket): + self.buckets[bucket.name] = bucket + return self.buckets[bucket.name] + + def bucket(self, name): + return FakeBucket(self, name) + def create_bucket(self, name): - self.buckets[name] = FakeBucket(self, name) - return self.buckets[name] + return self._add_bucket(self.bucket(name)) def get_bucket(self, name): if name in self.buckets: @@ -92,40 +98,51 @@ def __init__(self, client, name): self.name = name self.blobs = {} self.default_kms_key_name = None - self.client.buckets[name] = self - def add_blob(self, blob): - self.blobs[blob.name] = blob + def _get_canonical_bucket(self): + return self.client.get_bucket(self.name) - def create_blob(self, name): + def _create_blob(self, name): return FakeBlob(name, self) + def add_blob(self, blob): + bucket = self._get_canonical_bucket() + bucket.blobs[blob.name] = blob + return bucket.blobs[blob.name] + + def blob(self, name): + return self._create_blob(name) + def copy_blob(self, blob, dest, new_name=None): + if self.get_blob(blob.name) is None: + raise NotFound("source blob not found") if not new_name: new_name = blob.name - dest.blobs[new_name] = blob - dest.blobs[new_name].name = new_name - dest.blobs[new_name].bucket = dest - return dest.blobs[new_name] + new_blob = FakeBlob(new_name, dest) + dest.add_blob(new_blob) + return new_blob def get_blob(self, blob_name): - if blob_name in self.blobs: - return self.blobs[blob_name] + bucket = self._get_canonical_bucket() + if blob_name in bucket.blobs: + return bucket.blobs[blob_name] else: return None def lookup_blob(self, name): - if name in self.blobs: - return self.blobs[name] + bucket = self._get_canonical_bucket() + if name in bucket.blobs: + return bucket.blobs[name] else: - return self.create_blob(name) + return bucket.create_blob(name) def set_default_kms_key_name(self, name): self.default_kms_key_name = name def delete_blob(self, name): - if name in self.blobs: - del self.blobs[name] + bucket = self._get_canonical_bucket() + if name in bucket.blobs: + del bucket.blobs[name] class FakeBlob(object): @@ -151,11 +168,18 @@ def __init__( self.updated = updated self._fail_when_getting_metadata = fail_when_getting_metadata self._fail_when_reading = fail_when_reading - self.bucket.add_blob(self) def delete(self): - if self.name in self.bucket.blobs: - del self.bucket.blobs[self.name] + self.bucket.delete_blob(self.name) + + def download_as_bytes(self, **kwargs): + blob = self.bucket.get_blob(self.name) + if blob is None: + raise NotFound("blob not found") + return blob.contents + + def __eq__(self, other): + return self.bucket.get_blob(self.name) is other.bucket.get_blob(other.name) @unittest.skipIf(NotFound is None, 'GCP dependencies are not installed') @@ -224,6 +248,7 @@ def _insert_random_file( updated=updated, fail_when_getting_metadata=fail_when_getting_metadata, fail_when_reading=fail_when_reading) + bucket.add_blob(blob) return blob def setUp(self): @@ -475,7 +500,25 @@ def test_list_prefix(self): def test_downloader_fail_non_existent_object(self): file_name = 'gs://gcsio-metrics-test/dummy_mode_file' with self.assertRaises(NotFound): - self.gcs.open(file_name, 'r') + with self.gcs.open(file_name, 'r') as f: + f.read(1) + + def test_blob_delete(self): + file_name = 'gs://gcsio-test/delete_me' + file_size = 1024 + bucket_name, blob_name = gcsio.parse_gcs_path(file_name) + # Test deletion of non-existent file. + bucket = self.client.get_bucket(bucket_name) + self.gcs.delete(file_name) + + self._insert_random_file(self.client, file_name, file_size) + self.assertTrue(blob_name in bucket.blobs) + + blob = bucket.get_blob(blob_name) + self.assertIsNotNone(blob) + + blob.delete() + self.assertFalse(blob_name in bucket.blobs) if __name__ == '__main__': diff --git a/sdks/python/apache_beam/utils/transform_service_launcher.py b/sdks/python/apache_beam/utils/transform_service_launcher.py index ac492513aba5..ca3e456c5238 100644 --- a/sdks/python/apache_beam/utils/transform_service_launcher.py +++ b/sdks/python/apache_beam/utils/transform_service_launcher.py @@ -35,7 +35,7 @@ _COMMAND_POSSIBLE_VALUES = ['up', 'down', 'ps'] -_EXPANSION_SERVICE_LAUNCHER_JAR = ':sdks:java:transform-service:launcher:build' +_EXPANSION_SERVICE_LAUNCHER_JAR = ':sdks:java:transform-service:app:build' class TransformServiceLauncher(object): diff --git a/sdks/python/apache_beam/yaml/standard_io.yaml b/sdks/python/apache_beam/yaml/standard_io.yaml index 4d26ce96b677..d63729f1676c 100644 --- a/sdks/python/apache_beam/yaml/standard_io.yaml +++ b/sdks/python/apache_beam/yaml/standard_io.yaml @@ -71,6 +71,7 @@ 'topic': 'topic' 'bootstrap_servers': 'bootstrapServers' 'producer_config_updates': 'producerConfigUpdates' + 'error_handling': 'errorHandling' 'file_descriptor_path': 'fileDescriptorPath' 'message_name': 'messageName' 'schema': 'schema' @@ -97,6 +98,9 @@ 'attributes': 'attributes' 'attribute_map': 'attributeMap' 'attribute_id': 'attributeId' + 'error_handling': 'errorHandling' + 'file_descriptor_path': 'fileDescriptorPath' + 'message_name': 'messageName' 'WriteToPubSubLite': 'project': 'project' 'format': 'format' @@ -104,6 +108,10 @@ 'location': 'location' 'attributes': 'attributes' 'attribute_id': 'attributeId' + 'error_handling': 'errorHandling' + 'file_descriptor_path': 'fileDescriptorPath' + 'message_name': 'messageName' + 'schema': 'schema' underlying_provider: type: beamJar transforms: diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index ab6f75fc653b..7f2bc7f5d423 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -94,17 +94,32 @@ platform_identifiers_map.each { platform, idsuffix -> // generated installable Python SDK package doLast { - exec { - environment CIBW_BUILD: "cp${pyversion}-${idsuffix}" - environment CIBW_ENVIRONMENT: "SETUPTOOLS_USE_DISTUTILS=stdlib" - // note: sync cibuildwheel version with GitHub Action - // .github/workflow/build_wheel.yml:build_wheels "Install cibuildwheel" step - // note(https://github.com/pypa/cibuildwheel/issues/1692): cibuildwheel appears to timeout occasionally. - executable 'sh' - args '-c', ". ${envdir}/bin/activate && " + - "pip install cibuildwheel==2.9.0 && " + - "cibuildwheel --print-build-identifiers --platform ${platform} --archs ${archs} && " + - "for i in {1..3}; do cibuildwheel --output-dir ${buildDir} --platform ${platform} --archs ${archs} && break; done" + int maxRetries = 3 + int retryCount = 0 + // note(https://github.com/pypa/cibuildwheel/issues/1692): cibuildwheel appears to timeout occasionally. + while (retryCount < maxRetries) { + try { + exec { + environment CIBW_BUILD: "cp${pyversion}-${idsuffix}" + environment CIBW_ENVIRONMENT: "SETUPTOOLS_USE_DISTUTILS=stdlib" + executable 'sh' + args '-c', ". ${envdir}/bin/activate && " + + // note: sync cibuildwheel version with GitHub Action + // .github/workflows/build_wheel.yml:build_wheels "Install cibuildwheel" step + "pip install cibuildwheel==2.9.0 && " + + "cibuildwheel --print-build-identifiers --platform ${platform} --archs ${archs} && " + + "cibuildwheel --output-dir ${buildDir} --platform ${platform} --archs ${archs} " + } + break; + } + catch (Exception e) { + retryCount++ + if (retryCount < maxRetries) { + println "cibuildwheel failed on attempt ${retryCount}. Will retry." + } else { + throw e + } + } } } } diff --git a/settings.gradle.kts b/settings.gradle.kts index b1b03b55add0..512820fce3f1 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -203,7 +203,6 @@ include(":sdks:java:extensions:sql:udf") include(":sdks:java:extensions:sql:udf-test-provider") include(":sdks:java:extensions:timeseries") include(":sdks:java:extensions:zetasketch") -include(":sdks:java:fn-execution") include(":sdks:java:harness") include(":sdks:java:harness:jmh") include(":sdks:java:io:amazon-web-services") @@ -278,6 +277,7 @@ include(":sdks:java:testing:test-utils") include(":sdks:java:testing:tpcds") include(":sdks:java:testing:watermarks") include(":sdks:java:transform-service") +include(":sdks:java:transform-service:app") include(":sdks:java:transform-service:launcher") include(":sdks:java:transform-service:controller-container") include(":sdks:python") diff --git a/vendor/grpc-1_60_1/build.gradle b/vendor/grpc-1_60_1/build.gradle index 1617484c1c52..834c496d9ca4 100644 --- a/vendor/grpc-1_60_1/build.gradle +++ b/vendor/grpc-1_60_1/build.gradle @@ -23,7 +23,7 @@ plugins { id 'org.apache.beam.vendor-java' } description = "Apache Beam :: Vendored Dependencies :: gRPC :: 1.60.1" group = "org.apache.beam" -version = "0.1" +version = "0.2" vendorJava( dependencies: GrpcVendoring_1_60_1.dependencies(), diff --git a/website/Dockerfile b/website/Dockerfile index 0cb992422eac..e40724ea1811 100644 --- a/website/Dockerfile +++ b/website/Dockerfile @@ -44,15 +44,16 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Install node environment -RUN curl -sL https://deb.nodesource.com/setup_14.x | bash - \ +# Install node LTS environment +RUN curl -sL https://deb.nodesource.com/setup_lts.x | bash - \ && apt-get update \ && apt-get install -y --no-install-recommends \ nodejs \ - npm \ && apt-get autoremove -yqq --purge \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* + +RUN npm update -g npm RUN npm install postcss postcss-cli autoprefixer @@ -74,3 +75,5 @@ RUN HUGOHOME="$(mktemp -d)" \ && chmod +x /usr/local/bin/hugo \ && rm -r "${HUGOHOME}" +# Prevent git errors of the form "fatal: detected dubious ownership in repository" +RUN git config --file=/.gitconfig --add safe.directory /opt && chmod a+r /.gitconfig diff --git a/website/www/site/content/en/blog/apache-beam-flink-and-kubernetes-part3.md b/website/www/site/content/en/blog/apache-beam-flink-and-kubernetes-part3.md new file mode 100644 index 000000000000..f172cd78a472 --- /dev/null +++ b/website/www/site/content/en/blog/apache-beam-flink-and-kubernetes-part3.md @@ -0,0 +1,209 @@ +--- +title: "Behind the Scenes: Crafting an Autoscaler for Apache Beam in a High-Volume Streaming Environment" +date: 2024-02-05 09:00:00 -0400 +categories: + - blog +authors: + - talat +--- + + + +### Introduction to the Design of Our Autoscaler for Apache Beam Jobs + +Welcome to the third and final part of our blog series on building a scalable, self-managed streaming infrastructure with Beam and Flink. [In our previous post](https://beam.apache.org/blog/apache-beam-flink-and-kubernetes/), we delved into the scale of our streaming platforms, highlighting our capacity to manage over 40,000 streaming jobs and process upwards of 10 million events per second. This impressive scale sets the stage for the challenge we address today: the intricate task of resource allocation in a dynamic streaming environment. + +In this blog post [Talat Uyarer (Architect / Senior Principal Engineer)](https://www.linkedin.com/in/talatuyarer/) and [Rishabh Kedia (Principal Engineer)](https://www.linkedin.com/in/rishabhkedia/) describe more details about our Autoscaler. Imagine a scenario where your streaming system is inundated with fluctuating workloads. Our case presents a unique challenge, as our customers, equipped with firewalls distributed globally, generate logs at various times of the day. This results in workloads that not only vary by time but also escalate over time due to changes in settings or the addition of new cybersecurity solutions from PANW. Furthermore, updates to our codebase necessitate rolling out changes across all streaming jobs, leading to a temporary surge in demand as the system processes unprocessed data. + + + +Traditionally, managing this ebb and flow of demand involves a manual, often inefficient approach. One might over-provision resources to handle peak loads, inevitably leading to resource wastage during off-peak hours. Conversely, a more cost-conscious strategy might involve accepting delays during peak times, with the expectation of catching up later. However, both methods demand constant monitoring and manual adjustment - a far from ideal situation. + +In this modern era, where automated scaling of web front-ends is a given, we aspire to bring the same level of efficiency and automation to streaming infrastructure. Our goal is to develop a system that can dynamically track and adjust to the workload demands of our streaming operations. In this blog post, we will introduce you to our innovative solution - an autoscaler designed specifically for Apache Beam jobs. + + + +For clarity, when we refer to "resources" in this context, we mean the number of Flink Task Managers, or Kubernetes Pods, that process your streaming pipeline. These Task Managers aren't just about CPU; they also involve RAM, Network, Disk IO, and other computational resources. + +However, our solution is predicated on certain assumptions. Primarily, it's geared towards operations processing substantial data volumes. If your workload only requires a couple of Task Managers, this system might not be the best fit. In Our case we have 10K+ workload and each each of them has different workload. Manual tuning was not an option for us. We also assume that the data is evenly distributed, allowing for increased throughput with the addition of more Task Managers. This assumption is crucial for effective horizontal scaling. While there are real-world complexities that might challenge these assumptions, for the scope of this discussion, we will focus on scenarios where these conditions hold true. + +Join us as we delve into the design and functionality of our autoscaler, a solution tailored to bring efficiency, adaptability, and a touch of intelligence to the world of streaming infrastructure. + + +## Identifying the Right Signals for Autoscaling + +When we're overseeing a system like Apache Beam jobs on Flink, it's crucial to identify key signals that help us understand the relationship between our workload and resources. These signals are our guiding lights, showing us when we're lagging behind or wasting resources. By accurately identifying these signals, we can formulate effective scaling policies and implement changes in real-time. Imagine needing to expand from 100 to 200 TaskManagers — how do we smoothly make that transition? That's where these signals come into play. + +Remember, we're aiming for a universal solution applicable to any workload and pipeline. While specific problems might benefit from unique signals, our focus here is on creating a one-size-fits-all approach. + +In Flink, tasks form the basic execution unit and consist of one or more operators, such as map, filter, or reduce. Flink optimizes performance by chaining these operators into single tasks when possible, minimizing overheads like thread context switching and network I/O. Your pipeline, when optimized, turns into a directed acyclic graph of stages, each processing elements based on your code. Don't confuse stages with physical machines — they're separate concepts. In our job we measure backlog information by using Apache Beam's [`backlog_bytes` and `backlog_elements`](https://github.com/apache/beam/blob/master/sdks/java/core/src/main/java/org/apache/beam/sdk/metrics/SourceMetrics.java#L32) metrics. + + + +##### **Upscaling Signals** + +##### *Backlog Growth* +Let’s take a practical example. Consider a pipeline reading from Kafka, where different operators handle data parsing, formatting, and accumulation. The key metric here is throughput — how much data each operstor processes over time. But throughput alone isn't enough. We need to examine the queue size or backlog at each operator. A growing backlog indicates we're falling behind. We measure this as backlog growth — the first derivative of backlog size over time, highlighting our processing deficit. + + + +##### *Backlog Time* +This leads us to backlog time, a derived metric that compares backlog size with throughput. It’s a measure of how long it would take to clear the current backlog, assuming no new data arrives. This helps us identify if a backlog of a certain size is acceptable or problematic, based on our specific processing needs and thresholds. + + + + + +#### **Downscaling: When Less is More** + +##### *CPU Utilization* +A key signal for downscaling is CPU utilization. Low CPU utilization suggests we're using more resources than necessary. By monitoring this, we can scale down efficiently without compromising performance. + +#### **Signals Summary** + +In summary, the signals we've identified for effective autoscaling are: + +1. **Throughput:** The baseline of our performance. +2. **Backlog Growth:** Indicates if we’re keeping pace with incoming data. +3. **Backlog Time:** Helps understand the severity of backlog. +4. **CPU Utilization:** Guides us in resource optimization. + +These signals might seem straightforward, but their simplicity is key to a scalable, workload-agnostic autoscaling solution. + +## Simplifying Autoscaling Policies for Apache Beam Jobs on Flink + +In the world of Apache Beam jobs running on Flink, deciding when to scale up or down is a bit like being a chef in a busy kitchen. You need to keep an eye on several ingredients — your workload, virtual machines (VMs), and how they interact. It's about maintaining a perfect balance. Our main goals? Avoid falling behind in processing (no backlog growth), ensure that any existing backlog is manageable (short backlog time), and use our resources (like CPU) efficiently. + +#### **Up-scaling: Keeping Up and Catching Up** + +Imagine your system is like a team of chefs working together. Here's how we decide when to bring more chefs into the kitchen (a.k.a. upscaling): + +1. **Keeping Up:** First, we look at our current team size (number of VMs) and how much they're processing (throughput). We then adjust our team size based on the amount of incoming orders (input rate). It's about ensuring that our team is big enough to handle the current demand. + +2. **Catching Up:** Sometimes, we might have a backlog of orders. In that case, we decide how many extra chefs we need to clear this backlog within a desired time (like 60 seconds). This part of the policy helps us get back on track swiftly. + +#### **Scaling Example: A Practical Look** + +Let's paint a picture with an example. Initially, we have a steady flow of orders (input rate) matching our processing capacity (throughput), so there's no backlog. But suddenly, orders increase, and our team starts falling behind, creating a backlog. We respond by increasing our team size to match the new rate of orders. Though the backlog doesn't grow further, it still exists. Finally, we add a few more chefs to the team, which allows us to clear the backlog quickly and return to a new, balanced state. + + +#### **Downscaling: When to Reduce Resources** + +Downscaling is like knowing when some chefs can take a break after a rush hour. We consider this when: + +- Our backlog is low — we've caught up with the orders. +- The backlog isn't growing — we're keeping up with incoming orders. +- Our kitchen (CPU) isn't working too hard — we're using our resources efficiently. + +Downscaling is all about reducing resources without affecting the quality of service. It's about ensuring that we're not overstaffed when the rush hour is over. + +#### **Summary: A Recipe for Effective Scaling** + +In summary, our scaling policy is for scale up, we first ensure that the time to drain the backlog is beyond the threshold (120s) or the cpu is above the threshold (90%) + +Increasing Backlog aka Backlog Growth > 0 : + + + +Consistent Backlog aka Backlog Growth = 0: + + + +To Sum up: + + + +To scale down, we need to ensure the machine utilization is low (< 70%) and there is no backlog growth and current time to drain backlog is less than the limit (10s) + +So the only driving factor to calculate the required resources after a scale down is CPU + + + +## Executing Autoscaling Decision + +In our setup we use Reactive Mode which uses Adaptive Scheduler and Declarative Resources manager. We wanted to align resources with slots. As advised in most of the Flink documentation we set one per vCPU slot. Most of our jobs use 1 vCPU 4GB Memory combination for TaskManager. + +Reactive Mode, a unique feature of the Adaptive Scheduler, operates under the principle of one job per cluster, a rule enforced in Application Mode. In this mode, a job is configured to utilize all available resources within the cluster. Adding a TaskManager will increase the job's scale, while removing resources will decrease it. In this setup, Flink autonomously manages the job's parallelism, always maximizing it. + +During a rescaling event, Reactive Mode restarts the job using the most recent checkpoint. This eliminates the need for creating a savepoint, typically required for manual job rescaling. The volume of data reprocessed after rescaling is influenced by the checkpointing interval(10 seconds for us), and the time it takes to restore depends on the size of the state. + +The scheduler determines the parallelism of each operator within a job. This setting is not user-configurable and any attempts to set it, whether for individual operators or the entire job, will be overlooked. + + + +Parallelism can only be influenced by setting a maximum for pipelines, which the scheduler will honor. Our maxParallelism is limited by the total count of partitions that the pipeline will process, as well as by the job itself. We cap the maximum number of TaskManagers with maxWorker count and control the job's key count in shuffle by setting maxParallelism. Additionally, we set maxParallelism per pipeline to manage pipeline parallelism. The job cannot exceed the job's maxParallelism in terms of workers. + +After autoscaler analysis, we will tag if the job needs to be scaled up, no action or scaled down. To interact with the job, we use a library we have built over Flink Kubernetes Operator. This library allows us to interact with our flink jobs via a simple java method call. Library will convert our method call to a kubernetes command. + +In the kubernetes world, the call will look like this for a scale up: + +`kubectl scale flinkdeployment job-name --replicas=100` + +Apache Flink will handle the rest of the work needed to scale up. + +## Maintaining State for Stateful Streaming Application with Autoscaling + +Adapting Apache Flink's state recovery mechanisms for autoscaling involves leveraging its robust features like max parallelism, checkpointing, and the Adaptive Scheduler to ensure efficient and resilient stream processing, even as the system dynamically adjusts to varying loads. Here's how these components work together in an autoscaling context: + +1. **Max Parallelism** sets an upper limit on how much a job can scale out, ensuring that state can be redistributed across a larger or smaller number of nodes without exceeding predefined boundaries. This is crucial for autoscaling because it allows Flink to manage state effectively, even as the number of task slots changes to accommodate varying workloads. +2. **Checkpointing** is at the heart of Flink's fault tolerance mechanism, periodically saving the state of each job to a durable storage (in our case it is GCS bucket). In an autoscaling scenario, checkpointing enables Flink to recover to a consistent state after scaling operations. When the system scales out (adds more resources) or scales in (removes resources), Flink can restore the state from these checkpoints, ensuring data integrity and processing continuity without losing critical information. In scale down or up situations there could be a moment to reprocess data from last checkpoint. To reduce that amount we reduce the checkpointing interval to 10 seconds. +3. **Reactive Mode** is a special mode for Adaptive Scheduler, that assumes a single job per-cluster (enforced by the Application Mode). Reactive Mode configures a job so that it always uses all resources available in the cluster. Adding a TaskManager will scale up your job, removing resources will scale it down. Flink will manage the parallelism of the job, always setting it to the highest possible values. When a job undergoes resizing, Reactive Mode triggers a restart using the most recent successful checkpoint. + +## Conclusion + +In this blog series, we've taken a deep dive into the creation of an autoscaler for Apache Beam in a high-volume streaming environment, highlighting the journey from conceptualization to implementation. This endeavor not only tackled the complexities of dynamic resource allocation but also set a new standard for efficiency and adaptability in streaming infrastructure. By marrying intelligent scaling policies with the robust capabilities of Apache Beam and Flink, we've showcased a scalable solution that optimizes resource use and maintains performance under varying loads. This project stands as a testament to the power of teamwork, innovation, and a forward-thinking approach to streaming data processing. As we wrap up this series, we express our gratitude to all contributors and look forward to the continuous evolution of this technology, inviting the community to join us in further discussions and developments. + +# References + +[1] Streaming Auto-scaling in Google Cloud Dataflow [https://www.infoq.com/presentations/google-cloud-dataflow/](https://www.infoq.com/presentations/google-cloud-dataflow/) + +[2] Pipeline lifecycle [https://cloud.google.com/dataflow/docs/pipeline-lifecycle](https://cloud.google.com/dataflow/docs/pipeline-lifecycle) + +[3] Flink Elastic Scaling [https://nightlies.apache.org/flink/flink-docs-master/docs/deployment/elastic_scaling/](https://nightlies.apache.org/flink/flink-docs-master/docs/deployment/elastic_scaling/) + +# Acknowledgements + +This is a large effort to build the new infrastructure and to migrate the large customer based applications from cloud provider managed streaming infrastructure to self-managed Flink based infrastructure at scale. Thanks the Palo Alto Networks CDL streaming team who helped to make this happen: Kishore Pola, Andrew Park, Hemant Kumar, Manan Mangal, Helen Jiang, Mandy Wang, Praveen Kumar Pasupuleti, JM Teo, Rishabh Kedia, Talat Uyarer, Naitik Dani, and David He. + +--- + +**Explore More:** + +- [Part 1: Introduction to Building and Managing Apache Beam Flink Services on Kubernetes](https://beam.apache.org/blog/apache-beam-flink-and-kubernetes/) +- [Part 2: Build a scalable, self-managed streaming infrastructure with Flink: Tackling Autoscaling Challenges - Part 2](https://beam.apache.org/blog/apache-beam-flink-and-kubernetes-part2/) + +*Join the conversation and share your experiences on our [Community](https://beam.apache.org/community/) or contribute to our ongoing projects on [GitHub](https://github.com/apache/beam). Your feedback is invaluable. If you have any comments or questions about this series, please feel free to reach out to us via [User Mailist](https://beam.apache.org/community/contact-us/)* + +*Stay connected with us for more updates and insights into Apache Beam, Flink, and Kubernetes.* \ No newline at end of file diff --git a/website/www/site/content/en/contribute/runner-guide.md b/website/www/site/content/en/contribute/runner-guide.md index cb3b52048606..c2349ccea32c 100644 --- a/website/www/site/content/en/contribute/runner-guide.md +++ b/website/www/site/content/en/contribute/runner-guide.md @@ -341,7 +341,7 @@ For values in the global window, you may want to use an even further compressed representation that doesn't bother including the window at all. We provide coders with these optimizations such as -(`PARAM_WINDOWED_VALUE`)[https://github.com/apache/beam/blob/release-2.49.0/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto#L968] +[`PARAM_WINDOWED_VALUE`](https://github.com/apache/beam/blob/release-2.49.0/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto#L968) that can be used to reduce the size of serialized data. In the future, this primitive may be retired as it can be implemented as a @@ -375,8 +375,8 @@ combiner lifting, where a new operation is placed before the `GroupByKey` that does partial (within-bundle) combining, which often requires a slight modification of what comes after the `GroupByKey` as well. An example of this transformation can be found in the -(Python)[https://github.com/apache/beam/blob/release-2.49.0/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py#L1193] -or (go)[https://github.com/apache/beam/blob/release-2.49.0/sdks/go/pkg/beam/runners/prism/internal/handlecombine.go#L67] +[Python](https://github.com/apache/beam/blob/release-2.49.0/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py#L1193) +or [go](https://github.com/apache/beam/blob/release-2.49.0/sdks/go/pkg/beam/runners/prism/internal/handlecombine.go#L67) implementations of this optimization. The resulting pre- and post-`GroupByKey` operations are generally fused in with the `ParDo`s and executed as above. @@ -385,8 +385,8 @@ the `ParDo`s and executed as above. When you receive a pipeline from a user, you will need to translate it. An explanation of how Beam pipelines are represented can be found -(here)[https://docs.google.com/presentation/d/1atu-QC_mnK2SaeLhc0D78wZYgVOX1fN0H544QmBi3VA] -which compliment the (official proto declarations)[https://github.com/apache/beam/blob/master/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto]. +[here](https://docs.google.com/presentation/d/1atu-QC_mnK2SaeLhc0D78wZYgVOX1fN0H544QmBi3VA) +which compliment the [official proto declarations](https://github.com/apache/beam/blob/master/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto). ## Testing your runner @@ -757,7 +757,7 @@ and PipelineResult, but altered to be the minimal backend channel, versus a rich and convenient API. A key piece of this is the -(Artifacts API)[https://github.com/apache/beam/blob/master/model/job-management/src/main/proto/org/apache/beam/model/job_management/v1/beam_artifact_api.proto], +[Artifacts API](https://github.com/apache/beam/blob/master/model/job-management/src/main/proto/org/apache/beam/model/job_management/v1/beam_artifact_api.proto), which allows a Runner to fetch and deploy binary artifacts (such as jars, pypi packages, etc.) that are listed as dependencies in the various environments, and may have various representations. This is invoked after a pipeline diff --git a/website/www/site/content/en/documentation/dsls/sql/zetasql/overview.md b/website/www/site/content/en/documentation/dsls/sql/zetasql/overview.md index 03ab6e79177d..b68324a53677 100644 --- a/website/www/site/content/en/documentation/dsls/sql/zetasql/overview.md +++ b/website/www/site/content/en/documentation/dsls/sql/zetasql/overview.md @@ -16,7 +16,7 @@ See the License for the specific language governing permissions and limitations under the License. --> # Beam ZetaSQL overview -Beam SQL supports a varient of the [ZetaSQL](https://github.com/google/zetasql) language. ZetaSQL is similar to the language in BigQuery's SQL framework. This Beam SQL dialect is especially useful in pipelines that [write to or read from BigQuery tables](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html). +Beam SQL supports a variant of the [ZetaSQL](https://github.com/google/zetasql) language. ZetaSQL is similar to the language in BigQuery's SQL framework. This Beam SQL dialect is especially useful in pipelines that [write to or read from BigQuery tables](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html). Beam SQL has additional extensions leveraging Beam’s unified batch/streaming model and processing complex data types. You can use these extensions with all Beam SQL dialects, including Beam ZetaSQL. @@ -30,4 +30,4 @@ A Beam SQL statement comprises a series of tokens. For more information about to Beam SQL supports standard SQL scalar data types as well as extensions including arrays, maps, and nested rows. For more information about scalar data in Beam ZetaSQL, see the [Data types](/documentation/dsls/sql/zetasql/data-types) reference. ## Functions and operators -For a list of the builtin functions and operators supported in Beam ZetaSQL, see [SupportedZetaSqlBuiltinFunctions.java](https://github.com/apache/beam/blob/master/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/SupportedZetaSqlBuiltinFunctions.java) (commented-out entries are not yet supported). For documentation on how these functions work, see the [ZetaSQL functions and operators](https://github.com/google/zetasql/blob/master/docs/functions-and-operators.md) reference. \ No newline at end of file +For a list of the built-in functions and operators supported in Beam ZetaSQL, see [SupportedZetaSqlBuiltinFunctions.java](https://github.com/apache/beam/blob/master/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/SupportedZetaSqlBuiltinFunctions.java) (commented-out entries are not yet supported). For documentation on how these functions work, see the [ZetaSQL functions and operators](https://github.com/google/zetasql/blob/master/docs/functions-and-operators.md) reference. diff --git a/website/www/site/content/en/documentation/programming-guide.md b/website/www/site/content/en/documentation/programming-guide.md index 47808063254f..d3333c1f9f64 100644 --- a/website/www/site/content/en/documentation/programming-guide.md +++ b/website/www/site/content/en/documentation/programming-guide.md @@ -8165,7 +8165,7 @@ Beam users also have the option to [manually start](/documentation/programming-g A Beam Transform service instance can be manually started by using utilities provided with Apache Beam SDKs. {{< highlight java >}} -java -jar beam-sdks-java-transform-service-launcher-.jar --port --beam_version --project_name --command up +java -jar beam-sdks-java-transform-service-app-.jar --port --beam_version --project_name --command up {{< /highlight >}} {{< highlight py >}} @@ -8179,7 +8179,7 @@ This feature is currently in development. To stop the transform service, use the following commands. {{< highlight java >}} -java -jar beam-sdks-java-transform-service-launcher-.jar --port --beam_version --project_name --command down +java -jar beam-sdks-java-transform-service-app-.jar --port --beam_version --project_name --command down {{< /highlight >}} {{< highlight py >}} diff --git a/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/adaptive_scheduler_rescale.png b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/adaptive_scheduler_rescale.png new file mode 100644 index 000000000000..4515a9a2be8f Binary files /dev/null and b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/adaptive_scheduler_rescale.png differ diff --git a/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/auto-tuned-worker.png b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/auto-tuned-worker.png new file mode 100644 index 000000000000..a0aa8baadfe4 Binary files /dev/null and b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/auto-tuned-worker.png differ diff --git a/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/backlog_growth.png b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/backlog_growth.png new file mode 100644 index 000000000000..3ea77168f7c2 Binary files /dev/null and b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/backlog_growth.png differ diff --git a/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/backlog_time.png b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/backlog_time.png new file mode 100644 index 000000000000..5b9080a70549 Binary files /dev/null and b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/backlog_time.png differ diff --git a/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/cpurate_desired.png b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/cpurate_desired.png new file mode 100644 index 000000000000..d9c7d3b08d49 Binary files /dev/null and b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/cpurate_desired.png differ diff --git a/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/flink-operator-chaining.png b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/flink-operator-chaining.png new file mode 100644 index 000000000000..283394194443 Binary files /dev/null and b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/flink-operator-chaining.png differ diff --git a/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/operator-backlog.png b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/operator-backlog.png new file mode 100644 index 000000000000..7248fbb39394 Binary files /dev/null and b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/operator-backlog.png differ diff --git a/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/resource-allocation.png b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/resource-allocation.png new file mode 100644 index 000000000000..0b23eec32fe4 Binary files /dev/null and b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/resource-allocation.png differ diff --git a/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/worker_extra.png b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/worker_extra.png new file mode 100644 index 000000000000..427c436ed20f Binary files /dev/null and b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/worker_extra.png differ diff --git a/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/worker_require.png b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/worker_require.png new file mode 100644 index 000000000000..534fe21346fe Binary files /dev/null and b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/worker_require.png differ diff --git a/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/worker_scaleup.png b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/worker_scaleup.png new file mode 100644 index 000000000000..c3015f28f0d6 Binary files /dev/null and b/website/www/site/static/images/blog/apache-beam-flink-and-kubernetes-part3/worker_scaleup.png differ