From e8e38145d96a7b0561a418c19519917b0567591e Mon Sep 17 00:00:00 2001 From: Yi Hu Date: Wed, 11 Oct 2023 21:16:13 -0400 Subject: [PATCH 01/13] Setup Java 21 container (#28833) * Add Java21 container * fix spotless * Update website * Fix jamm * align configs in recent change --- ...a_CoGBK_Dataflow_V2_Batch_JavaVersions.yml | 8 ++-- ...GBK_Dataflow_V2_Streaming_JavaVersions.yml | 8 ++-- ...ests_Java_GBK_Dataflow_V2_Batch_Java11.yml | 14 +++---- ...ests_Java_GBK_Dataflow_V2_Batch_Java17.yml | 14 +++---- ..._Java_GBK_Dataflow_V2_Streaming_Java11.yml | 14 +++---- ..._Java_GBK_Dataflow_V2_Streaming_Java17.yml | 14 +++---- ..._PostCommit_Java_Examples_Dataflow_ARM.yml | 2 +- ...tCommit_Java_Examples_Dataflow_V2_Java.yml | 2 +- ...m_PostCommit_Java_Jpms_Dataflow_Java17.yml | 2 +- ...eam_PostCommit_Java_Jpms_Direct_Java17.yml | 2 +- ...stCommit_Java_Nexmark_Dataflow_V2_Java.yml | 2 +- ...eam_PostCommit_TransformService_Direct.yml | 2 +- ...reCommit_Java_Examples_Dataflow_Java11.yml | 2 +- ...reCommit_Java_Examples_Dataflow_Java17.yml | 2 +- .../workflows/beam_PreCommit_SQL_Java11.yml | 2 +- .../workflows/beam_PreCommit_SQL_Java17.yml | 2 +- .test-infra/jenkins/JavaTestProperties.groovy | 7 +++- .test-infra/jenkins/NexmarkBuilder.groovy | 4 +- ..._LoadTests_CoGBK_Dataflow_V2_Java11.groovy | 2 +- ..._LoadTests_CoGBK_Dataflow_V2_Java17.groovy | 2 +- ...ob_LoadTests_GBK_Dataflow_V2_Java11.groovy | 2 +- ...ob_LoadTests_GBK_Dataflow_V2_Java17.groovy | 2 +- ..._LoadTests_ParDo_Dataflow_V2_Java11.groovy | 2 +- ..._LoadTests_ParDo_Dataflow_V2_Java17.groovy | 2 +- ...it_Java_Examples_Dataflow_V2_Java11.groovy | 2 +- ...it_Java_Examples_Dataflow_V2_Java17.groovy | 2 +- ...ostCommit_Java_Jpms_Dataflow_Java17.groovy | 2 +- ..._PostCommit_Java_Jpms_Direct_Java17.groovy | 2 +- ..._PostCommit_TransformService_Direct.groovy | 2 +- ...ommit_Java_Examples_Dataflow_Java11.groovy | 2 +- .../jenkins/job_PreCommit_SQL_Java11.groovy | 2 +- .../jenkins/job_PreCommit_SQL_Java17.groovy | 2 +- ...ommit_Java_Examples_Dataflow_Java17.groovy | 2 +- build.gradle.kts | 14 +++---- .../beam/gradle/BeamModulePlugin.groovy | 37 ++++++++++++++++++- .../core/construction/Environments.java | 6 ++- .../core/construction/EnvironmentsTest.java | 6 ++- .../arm/build.gradle | 6 +-- .../google-cloud-dataflow-java/build.gradle | 6 +-- .../beam/runners/dataflow/DataflowRunner.java | 7 ++-- runners/spark/spark_runner.gradle | 5 ++- sdks/java/container/Dockerfile | 3 +- sdks/java/container/common.gradle | 14 ++++--- sdks/java/container/java21/build.gradle | 30 +++++++++++++++ sdks/java/container/java21/option-jamm.json | 12 ++++++ sdks/java/testing/jpms-tests/build.gradle | 21 +++++------ sdks/java/testing/test-utils/build.gradle | 29 +++++---------- .../jvmverification/JvmVerification.java | 16 +++++++- settings.gradle.kts | 1 + .../www/site/content/en/roadmap/java-sdk.md | 6 +-- 50 files changed, 220 insertions(+), 132 deletions(-) create mode 100644 sdks/java/container/java21/build.gradle create mode 100644 sdks/java/container/java21/option-jamm.json diff --git a/.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_V2_Batch_JavaVersions.yml b/.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_V2_Batch_JavaVersions.yml index 35575aec0bef..ff172cc338b7 100644 --- a/.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_V2_Batch_JavaVersions.yml +++ b/.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_V2_Batch_JavaVersions.yml @@ -95,7 +95,7 @@ jobs: with: gradle-command: :sdks:java:testing:load-tests:run arguments: | - -PcompileAndRunTestsWithJava${{ matrix.java_version }} \ + -PtestJavaVersion=${{ matrix.java_version }} \ -Pjava${{ matrix.java_version }}Home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ @@ -105,7 +105,7 @@ jobs: with: gradle-command: :sdks:java:testing:load-tests:run arguments: | - -PcompileAndRunTestsWithJava${{ matrix.java_version }} \ + -PtestJavaVersion=${{ matrix.java_version }} \ -Pjava${{ matrix.java_version }}Home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ @@ -115,7 +115,7 @@ jobs: with: gradle-command: :sdks:java:testing:load-tests:run arguments: | - -PcompileAndRunTestsWithJava${{ matrix.java_version }} \ + -PtestJavaVersion=${{ matrix.java_version }} \ -Pjava${{ matrix.java_version }}Home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ @@ -125,7 +125,7 @@ jobs: with: gradle-command: :sdks:java:testing:load-tests:run arguments: | - -PcompileAndRunTestsWithJava${{ matrix.java_version }} \ + -PtestJavaVersion=${{ matrix.java_version }} \ -Pjava${{ matrix.java_version }}Home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ diff --git a/.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_V2_Streaming_JavaVersions.yml b/.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_V2_Streaming_JavaVersions.yml index 46dfaa986920..efa61373f2ab 100644 --- a/.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_V2_Streaming_JavaVersions.yml +++ b/.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_V2_Streaming_JavaVersions.yml @@ -95,7 +95,7 @@ jobs: with: gradle-command: :sdks:java:testing:load-tests:run arguments: | - -PcompileAndRunTestsWithJava${{ matrix.java_version }} \ + -PtestJavaVersion=${{ matrix.java_version }} \ -Pjava${{ matrix.java_version }}Home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ @@ -105,7 +105,7 @@ jobs: with: gradle-command: :sdks:java:testing:load-tests:run arguments: | - -PcompileAndRunTestsWithJava${{ matrix.java_version }} \ + -PtestJavaVersion=${{ matrix.java_version }} \ -Pjava${{ matrix.java_version }}Home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ @@ -115,7 +115,7 @@ jobs: with: gradle-command: :sdks:java:testing:load-tests:run arguments: | - -PcompileAndRunTestsWithJava${{ matrix.java_version }} \ + -PtestJavaVersion=${{ matrix.java_version }} \ -Pjava${{ matrix.java_version }}Home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ @@ -125,7 +125,7 @@ jobs: with: gradle-command: :sdks:java:testing:load-tests:run arguments: | - -PcompileAndRunTestsWithJava${{ matrix.java_version }} \ + -PtestJavaVersion=${{ matrix.java_version }} \ -Pjava${{ matrix.java_version }}Home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ diff --git a/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java11.yml b/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java11.yml index 318f157e57a6..5c333013e2a2 100644 --- a/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java11.yml +++ b/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java11.yml @@ -93,7 +93,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java11_test_arguments_1 }}' \ - name: run Load test 2GB of 100B records @@ -104,7 +104,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java11_test_arguments_2 }}' \ - name: run Load test 2GB of 100kB records @@ -115,7 +115,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java11_test_arguments_3 }}' \ - name: run Load test fanout 4 times with 2GB 10-byte records total @@ -126,7 +126,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java11_test_arguments_4 }}' \ - name: run Load test fanout 8 times with 2GB 10-byte records total @@ -137,7 +137,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java11_test_arguments_5 }}' \ - name: run Load test reiterate 4 times 10kB values @@ -148,7 +148,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java11_test_arguments_6 }}' \ - name: run Load test reiterate 4 times 2MB values @@ -159,6 +159,6 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java11_test_arguments_7 }}' \ No newline at end of file diff --git a/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java17.yml b/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java17.yml index 543cb86985d8..4fa5c364728b 100644 --- a/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java17.yml +++ b/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java17.yml @@ -95,7 +95,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -Pjava17Home=$JAVA_HOME_17_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java17_test_arguments_1 }}' \ - name: run Load test 2GB of 100B records @@ -106,7 +106,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -Pjava17Home=$JAVA_HOME_17_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java17_test_arguments_2 }}' \ - name: run Load test 2GB of 100kB records @@ -117,7 +117,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -Pjava17Home=$JAVA_HOME_17_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java17_test_arguments_3 }}' \ - name: run Load test fanout 4 times with 2GB 10-byte records total @@ -128,7 +128,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -Pjava17Home=$JAVA_HOME_17_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java17_test_arguments_4 }}' \ - name: run Load test fanout 8 times with 2GB 10-byte records total @@ -139,7 +139,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -Pjava17Home=$JAVA_HOME_17_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java17_test_arguments_5 }}' \ - name: run Load test reiterate 4 times 10kB values @@ -150,7 +150,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -Pjava17Home=$JAVA_HOME_17_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java17_test_arguments_6 }}' \ - name: run Load test reiterate 4 times 2MB values @@ -161,6 +161,6 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -Pjava17Home=$JAVA_HOME_17_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Batch_Java17_test_arguments_7 }}' \ No newline at end of file diff --git a/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java11.yml b/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java11.yml index fcd55f761c04..6432a315d8a1 100644 --- a/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java11.yml +++ b/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java11.yml @@ -93,7 +93,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java11_test_arguments_1 }}' \ - name: run Load test 2GB of 100B records @@ -104,7 +104,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java11_test_arguments_2 }}' \ - name: run Load test 2GB of 100kB records @@ -115,7 +115,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java11_test_arguments_3 }}' \ - name: run Load test fanout 4 times with 2GB 10-byte records total @@ -126,7 +126,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java11_test_arguments_4 }}' \ - name: run Load test fanout 8 times with 2GB 10-byte records total @@ -137,7 +137,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java11_test_arguments_5 }}' \ - name: run Load test reiterate 4 times 10kB values @@ -148,7 +148,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java11_test_arguments_6 }}' \ - name: run Load test reiterate 4 times 2MB values @@ -159,6 +159,6 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java11_test_arguments_7 }}' \ No newline at end of file diff --git a/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java17.yml b/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java17.yml index 5df7d3249192..cd8c34279593 100644 --- a/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java17.yml +++ b/.github/workflows/beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java17.yml @@ -95,7 +95,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -Pjava17Home=$JAVA_HOME_17_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java17_test_arguments_1 }}' \ - name: run Load test 2GB of 100B records @@ -106,7 +106,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -Pjava17Home=$JAVA_HOME_17_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java17_test_arguments_2 }}' \ - name: run Load test 2GB of 100kB records @@ -117,7 +117,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -Pjava17Home=$JAVA_HOME_17_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java17_test_arguments_3 }}' \ - name: run Load test fanout 4 times with 2GB 10-byte records total @@ -128,7 +128,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -Pjava17Home=$JAVA_HOME_17_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java17_test_arguments_4 }}' \ - name: run Load test fanout 8 times with 2GB 10-byte records total @@ -139,7 +139,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -Pjava17Home=$JAVA_HOME_17_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java17_test_arguments_5 }}' \ - name: run Load test reiterate 4 times 10kB values @@ -150,7 +150,7 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -Pjava17Home=$JAVA_HOME_17_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java17_test_arguments_6 }}' \ - name: run Load test reiterate 4 times 2MB values @@ -161,6 +161,6 @@ jobs: -PloadTest.mainClass=org.apache.beam.sdk.loadtests.GroupByKeyLoadTest \ -Prunner=:runners:google-cloud-dataflow-java \ -Prunner.version=V2 \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -Pjava17Home=$JAVA_HOME_17_X64 \ '-PloadTest.args=${{ env.beam_LoadTests_Java_GBK_Dataflow_V2_Streaming_Java17_test_arguments_7 }}' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml index 939d2646d352..8add1a2445c8 100644 --- a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml +++ b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml @@ -111,7 +111,7 @@ jobs: gradle-command: :runners:google-cloud-dataflow-java:arm:examplesJavaRunnerV2IntegrationTestARM max-workers: 12 arguments: | - -PcompileAndRunTestsWithJava${{ matrix.java_version }} \ + -PtestJavaVersion${{ matrix.java_version }} \ -Pjava${{ matrix.java_version }}Home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ -Pcontainer-architecture-list=arm64,amd64 \ -Ppush-containers \ diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2_Java.yml b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2_Java.yml index 74e1787945e3..ab862df1847a 100644 --- a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2_Java.yml +++ b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2_Java.yml @@ -86,7 +86,7 @@ jobs: -PdisableSpotlessCheck=true \ -PdisableCheckStyle=true \ -PskipCheckerFramework \ - -PcompileAndRunTestsWithJava${{ matrix.java_version }} \ + -PtestJavaVersion${{ matrix.java_version }} \ -Pjava${{ matrix.java_version }}Home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ - name: Archive JUnit Test Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml index 0b9a366211ee..3f6352b0a948 100644 --- a/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml @@ -80,7 +80,7 @@ jobs: gradle-command: :sdks:java:testing:jpms-tests:dataflowRunnerIntegrationTest arguments: -PskipCheckerFramework - -PcompileAndRunTestsWithJava17 + -PtestJavaVersion=17 -Pjava17Home=$JAVA_HOME_17_X64 - name: Archive JUnit Test Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml index fcdbd63f7e97..360b9c757a02 100644 --- a/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml @@ -80,7 +80,7 @@ jobs: gradle-command: :sdks:java:testing:jpms-tests:directRunnerIntegrationTest arguments: -PskipCheckerFramework - -PcompileAndRunTestsWithJava17 + -PtestJavaVersion=17 -Pjava17Home=$JAVA_HOME_17_X64 - name: Archive JUnit Test Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml b/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml index d78793172898..5782928a8b4d 100644 --- a/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml +++ b/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml @@ -107,7 +107,7 @@ jobs: with: gradle-command: :sdks:java:testing:nexmark:run arguments: | - -PcompileAndRunTestsWithJava${{ matrix.java_version }} \ + -PtestJavaVersion${{ matrix.java_version }} \ -Pjava${{ matrix.java_version }}Home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ -Pnexmark.runner.version=V2 \ -Pnexmark.runner=:runners:google-cloud-dataflow-java \ diff --git a/.github/workflows/beam_PostCommit_TransformService_Direct.yml b/.github/workflows/beam_PostCommit_TransformService_Direct.yml index 491688e097a0..2ba83506d395 100644 --- a/.github/workflows/beam_PostCommit_TransformService_Direct.yml +++ b/.github/workflows/beam_PostCommit_TransformService_Direct.yml @@ -84,7 +84,7 @@ jobs: with: gradle-command: :sdks:python:test-suites:direct:xlang:transformServicePythonUsingJava arguments: | - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ -PuseWheelDistribution \ -PpythonVersion=${{ matrix.python_version }} \ diff --git a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml index 03f4d32861b1..186d75674c71 100644 --- a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml +++ b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml @@ -115,7 +115,7 @@ jobs: -PdisableSpotlessCheck=true \ -PdisableCheckStyle=true \ -PskipCheckerFramework \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -Pjava11Home=$JAVA_HOME_11_X64 \ - name: Archive JUnit Test Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml index cf8cefe388f3..cafc8cf5ab6e 100644 --- a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml +++ b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml @@ -115,7 +115,7 @@ jobs: arguments: | -PdisableSpotlessCheck=true \ -PdisableCheckStyle=true \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -PskipCheckerFramework \ -Pjava17Home=$JAVA_HOME_17_X64 \ max-workers: 12 diff --git a/.github/workflows/beam_PreCommit_SQL_Java11.yml b/.github/workflows/beam_PreCommit_SQL_Java11.yml index f1c733418b8d..3412365bf13e 100644 --- a/.github/workflows/beam_PreCommit_SQL_Java11.yml +++ b/.github/workflows/beam_PreCommit_SQL_Java11.yml @@ -103,7 +103,7 @@ jobs: arguments: | -PdisableSpotlessCheck=true \ -PdisableCheckStyle=true \ - -PcompileAndRunTestsWithJava11 \ + -PtestJavaVersion=11 \ -PskipCheckerFramework \ -Pjava11Home=$JAVA_HOME_11_X64 \ - name: Archive JUnit Test Results diff --git a/.github/workflows/beam_PreCommit_SQL_Java17.yml b/.github/workflows/beam_PreCommit_SQL_Java17.yml index 7547bd396815..2215bab8ddeb 100644 --- a/.github/workflows/beam_PreCommit_SQL_Java17.yml +++ b/.github/workflows/beam_PreCommit_SQL_Java17.yml @@ -101,7 +101,7 @@ jobs: arguments: | -PdisableSpotlessCheck=true \ -PdisableCheckStyle=true \ - -PcompileAndRunTestsWithJava17 \ + -PtestJavaVersion=17 \ -PskipCheckerFramework \ -Pjava17Home=$JAVA_HOME_17_X64 \ - name: Archive JUnit Test Results diff --git a/.test-infra/jenkins/JavaTestProperties.groovy b/.test-infra/jenkins/JavaTestProperties.groovy index ce7446a6e71d..5403cee5cf9a 100644 --- a/.test-infra/jenkins/JavaTestProperties.groovy +++ b/.test-infra/jenkins/JavaTestProperties.groovy @@ -17,5 +17,10 @@ */ class JavaTestProperties { - final static List SUPPORTED_CONTAINER_TASKS = ['java8', 'java11', 'java17'] + final static List SUPPORTED_CONTAINER_TASKS = [ + 'java8', + 'java11', + 'java17', + 'java21' + ] } diff --git a/.test-infra/jenkins/NexmarkBuilder.groovy b/.test-infra/jenkins/NexmarkBuilder.groovy index 044b0cbb9561..69fa3dcc4277 100644 --- a/.test-infra/jenkins/NexmarkBuilder.groovy +++ b/.test-infra/jenkins/NexmarkBuilder.groovy @@ -145,7 +145,7 @@ class NexmarkBuilder { rootBuildScriptDir(commonJobProperties.checkoutDir) tasks(':sdks:java:testing:nexmark:run') commonJobProperties.setGradleSwitches(delegate) - switches("-PcompileAndRunTestsWithJava11") + switches("-PtestJavaVersion=11") switches("-Pjava11Home=${commonJobProperties.JAVA_11_HOME}") switches("-Pnexmark.runner=${runner.getDependencyBySDK(sdk)}") switches("-Pnexmark.args=\"${parseOptions(options)}\"") @@ -168,7 +168,7 @@ class NexmarkBuilder { rootBuildScriptDir(commonJobProperties.checkoutDir) tasks(':sdks:java:testing:nexmark:run') commonJobProperties.setGradleSwitches(delegate) - switches("-PcompileAndRunTestsWithJava17") + switches("-PtestJavaVersion=17") switches("-Pjava17Home=${commonJobProperties.JAVA_17_HOME}") switches("-Pnexmark.runner=${runner.getDependencyBySDK(sdk)}") switches("-Pnexmark.args=\"${parseOptions(options)}\"") diff --git a/.test-infra/jenkins/job_LoadTests_CoGBK_Dataflow_V2_Java11.groovy b/.test-infra/jenkins/job_LoadTests_CoGBK_Dataflow_V2_Java11.groovy index fc7f39d28a0d..55501db4429d 100644 --- a/.test-infra/jenkins/job_LoadTests_CoGBK_Dataflow_V2_Java11.groovy +++ b/.test-infra/jenkins/job_LoadTests_CoGBK_Dataflow_V2_Java11.groovy @@ -188,7 +188,7 @@ def loadTestConfigurations = { mode, isStreaming -> def final JOB_SPECIFIC_SWITCHES = [ '-Prunner.version="V2"', - '-PcompileAndRunTestsWithJava11', + '-PtestJavaVersion=11', "-Pjava11Home=${commonJobProperties.JAVA_11_HOME}" ] diff --git a/.test-infra/jenkins/job_LoadTests_CoGBK_Dataflow_V2_Java17.groovy b/.test-infra/jenkins/job_LoadTests_CoGBK_Dataflow_V2_Java17.groovy index ca8c6689ad0f..8fb09fd07448 100644 --- a/.test-infra/jenkins/job_LoadTests_CoGBK_Dataflow_V2_Java17.groovy +++ b/.test-infra/jenkins/job_LoadTests_CoGBK_Dataflow_V2_Java17.groovy @@ -188,7 +188,7 @@ def loadTestConfigurations = { mode, isStreaming -> def final JOB_SPECIFIC_SWITCHES = [ '-Prunner.version="V2"', - '-PcompileAndRunTestsWithJava17', + '-PtestJavaVersion=17', "-Pjava17Home=${commonJobProperties.JAVA_17_HOME}" ] diff --git a/.test-infra/jenkins/job_LoadTests_GBK_Dataflow_V2_Java11.groovy b/.test-infra/jenkins/job_LoadTests_GBK_Dataflow_V2_Java11.groovy index cc2d5d2e5554..2191e448fade 100644 --- a/.test-infra/jenkins/job_LoadTests_GBK_Dataflow_V2_Java11.groovy +++ b/.test-infra/jenkins/job_LoadTests_GBK_Dataflow_V2_Java11.groovy @@ -253,7 +253,7 @@ def loadTestConfigurations = { mode, isStreaming -> def final JOB_SPECIFIC_SWITCHES = [ '-Prunner.version="V2"', - '-PcompileAndRunTestsWithJava11', + '-PtestJavaVersion=11', "-Pjava11Home=${commonJobProperties.JAVA_11_HOME}" ] diff --git a/.test-infra/jenkins/job_LoadTests_GBK_Dataflow_V2_Java17.groovy b/.test-infra/jenkins/job_LoadTests_GBK_Dataflow_V2_Java17.groovy index 7405f9154b83..2520f68f0178 100644 --- a/.test-infra/jenkins/job_LoadTests_GBK_Dataflow_V2_Java17.groovy +++ b/.test-infra/jenkins/job_LoadTests_GBK_Dataflow_V2_Java17.groovy @@ -253,7 +253,7 @@ def loadTestConfigurations = { mode, isStreaming -> def final JOB_SPECIFIC_SWITCHES = [ '-Prunner.version="V2"', - '-PcompileAndRunTestsWithJava17', + '-PtestJavaVersion=17', "-Pjava17Home=${commonJobProperties.JAVA_17_HOME}" ] diff --git a/.test-infra/jenkins/job_LoadTests_ParDo_Dataflow_V2_Java11.groovy b/.test-infra/jenkins/job_LoadTests_ParDo_Dataflow_V2_Java11.groovy index a4535d52e6cf..b7154e840e6a 100644 --- a/.test-infra/jenkins/job_LoadTests_ParDo_Dataflow_V2_Java11.groovy +++ b/.test-infra/jenkins/job_LoadTests_ParDo_Dataflow_V2_Java11.groovy @@ -160,7 +160,7 @@ def commonLoadTestConfig = { jobType, isStreaming -> def final JOB_SPECIFIC_SWITCHES = [ '-Prunner.version="V2"', - '-PcompileAndRunTestsWithJava11', + '-PtestJavaVersion=11', "-Pjava11Home=${commonJobProperties.JAVA_11_HOME}" ] diff --git a/.test-infra/jenkins/job_LoadTests_ParDo_Dataflow_V2_Java17.groovy b/.test-infra/jenkins/job_LoadTests_ParDo_Dataflow_V2_Java17.groovy index f0f2179ebb3b..df6c66e02aed 100644 --- a/.test-infra/jenkins/job_LoadTests_ParDo_Dataflow_V2_Java17.groovy +++ b/.test-infra/jenkins/job_LoadTests_ParDo_Dataflow_V2_Java17.groovy @@ -160,7 +160,7 @@ def commonLoadTestConfig = { jobType, isStreaming -> def final JOB_SPECIFIC_SWITCHES = [ '-Prunner.version="V2"', - '-PcompileAndRunTestsWithJava17', + '-PtestJavaVersion=17', "-Pjava17Home=${commonJobProperties.JAVA_17_HOME}" ] diff --git a/.test-infra/jenkins/job_PostCommit_Java_Examples_Dataflow_V2_Java11.groovy b/.test-infra/jenkins/job_PostCommit_Java_Examples_Dataflow_V2_Java11.groovy index 6687ae0e6f8a..6229f7c48a72 100644 --- a/.test-infra/jenkins/job_PostCommit_Java_Examples_Dataflow_V2_Java11.groovy +++ b/.test-infra/jenkins/job_PostCommit_Java_Examples_Dataflow_V2_Java11.groovy @@ -43,7 +43,7 @@ PostcommitJobBuilder.postCommitJob('beam_PostCommit_Java_Examples_Dataflow_V2_ja commonJobProperties.setGradleSwitches(delegate, 3 * Runtime.runtime.availableProcessors()) switches '-PdisableSpotlessCheck=true' switches '-PdisableCheckStyle=true' - switches '-PcompileAndRunTestsWithJava11' + switches '-PtestJavaVersion=11' switches '-PskipCheckerFramework' switches "-Pjava11Home=${commonJobProperties.JAVA_11_HOME}" } diff --git a/.test-infra/jenkins/job_PostCommit_Java_Examples_Dataflow_V2_Java17.groovy b/.test-infra/jenkins/job_PostCommit_Java_Examples_Dataflow_V2_Java17.groovy index b275fe9276d9..7e52a7e09789 100644 --- a/.test-infra/jenkins/job_PostCommit_Java_Examples_Dataflow_V2_Java17.groovy +++ b/.test-infra/jenkins/job_PostCommit_Java_Examples_Dataflow_V2_Java17.groovy @@ -43,7 +43,7 @@ PostcommitJobBuilder.postCommitJob('beam_PostCommit_Java_Examples_Dataflow_V2_ja commonJobProperties.setGradleSwitches(delegate, 3 * Runtime.runtime.availableProcessors()) switches '-PdisableSpotlessCheck=true' switches '-PdisableCheckStyle=true' - switches '-PcompileAndRunTestsWithJava17' + switches '-PtestJavaVersion=17' switches '-PskipCheckerFramework' switches "-Pjava17Home=${commonJobProperties.JAVA_17_HOME}" } diff --git a/.test-infra/jenkins/job_PostCommit_Java_Jpms_Dataflow_Java17.groovy b/.test-infra/jenkins/job_PostCommit_Java_Jpms_Dataflow_Java17.groovy index 4e26c164319e..f518985ca7a8 100644 --- a/.test-infra/jenkins/job_PostCommit_Java_Jpms_Dataflow_Java17.groovy +++ b/.test-infra/jenkins/job_PostCommit_Java_Jpms_Dataflow_Java17.groovy @@ -42,7 +42,7 @@ PostcommitJobBuilder.postCommitJob('beam_PostCommit_Java_Jpms_Dataflow_Java17', tasks(':sdks:java:testing:jpms-tests:dataflowRunnerIntegrationTest') commonJobProperties.setGradleSwitches(delegate) switches("-PskipCheckerFramework") - switches("-PcompileAndRunTestsWithJava17") + switches("-PtestJavaVersion=17") switches("-Pjava17Home=${commonJobProperties.JAVA_17_HOME}") // Specify maven home on Jenkins, needed by Maven archetype integration tests. switches('-Pmaven_home=/home/jenkins/tools/maven/apache-maven-3.5.4') diff --git a/.test-infra/jenkins/job_PostCommit_Java_Jpms_Direct_Java17.groovy b/.test-infra/jenkins/job_PostCommit_Java_Jpms_Direct_Java17.groovy index f31373ecaada..04c31389ecbe 100644 --- a/.test-infra/jenkins/job_PostCommit_Java_Jpms_Direct_Java17.groovy +++ b/.test-infra/jenkins/job_PostCommit_Java_Jpms_Direct_Java17.groovy @@ -42,7 +42,7 @@ PostcommitJobBuilder.postCommitJob('beam_PostCommit_Java_Jpms_Direct_Java17', 'R tasks(':sdks:java:testing:jpms-tests:directRunnerIntegrationTest') commonJobProperties.setGradleSwitches(delegate) switches("-PskipCheckerFramework") - switches("-PcompileAndRunTestsWithJava17") + switches("-PtestJavaVersion=17") switches("-Pjava17Home=${commonJobProperties.JAVA_17_HOME}") // Specify maven home on Jenkins, needed by Maven archetype integration tests. switches('-Pmaven_home=/home/jenkins/tools/maven/apache-maven-3.5.4') diff --git a/.test-infra/jenkins/job_PostCommit_TransformService_Direct.groovy b/.test-infra/jenkins/job_PostCommit_TransformService_Direct.groovy index 0d7f58e71706..03d29069a52c 100644 --- a/.test-infra/jenkins/job_PostCommit_TransformService_Direct.groovy +++ b/.test-infra/jenkins/job_PostCommit_TransformService_Direct.groovy @@ -43,7 +43,7 @@ PostcommitJobBuilder.postCommitJob('beam_PostCommit_TransformService_Direct', rootBuildScriptDir(commonJobProperties.checkoutDir) tasks(':sdks:python:test-suites:direct:xlang:transformServicePythonUsingJava') commonJobProperties.setGradleSwitches(delegate) - switches '-PcompileAndRunTestsWithJava11' + switches '-PtestJavaVersion=11' switches "-Pjava11Home=${commonJobProperties.JAVA_11_HOME}" switches("-PuseWheelDistribution") switches("-PpythonVersion=${pythonVersion}") diff --git a/.test-infra/jenkins/job_PreCommit_Java_Examples_Dataflow_Java11.groovy b/.test-infra/jenkins/job_PreCommit_Java_Examples_Dataflow_Java11.groovy index cabf5dbdd457..9f733a42ccce 100644 --- a/.test-infra/jenkins/job_PreCommit_Java_Examples_Dataflow_Java11.groovy +++ b/.test-infra/jenkins/job_PreCommit_Java_Examples_Dataflow_Java11.groovy @@ -50,7 +50,7 @@ builder.build { switches '-PdisableSpotlessCheck=true' switches '-PdisableCheckStyle=true' switches '-PskipCheckerFramework' // Gradle itself is running under JDK8 so plugin configures wrong for JDK11 - switches '-PcompileAndRunTestsWithJava11' + switches '-PtestJavaVersion=11' switches "-Pjava11Home=${properties.JAVA_11_HOME}" properties.setGradleSwitches(delegate, 3 * Runtime.runtime.availableProcessors()) } diff --git a/.test-infra/jenkins/job_PreCommit_SQL_Java11.groovy b/.test-infra/jenkins/job_PreCommit_SQL_Java11.groovy index 9742ab756cf7..34154b37209e 100644 --- a/.test-infra/jenkins/job_PreCommit_SQL_Java11.groovy +++ b/.test-infra/jenkins/job_PreCommit_SQL_Java11.groovy @@ -26,7 +26,7 @@ PrecommitJobBuilder builder = new PrecommitJobBuilder( gradleSwitches: [ '-PdisableSpotlessCheck=true', '-PdisableCheckStyle=true', - '-PcompileAndRunTestsWithJava11', + '-PtestJavaVersion=11', '-PskipCheckerFramework', // Gradle itself is running under JDK8 so plugin configures wrong for JDK11 "-Pjava11Home=${properties.JAVA_11_HOME}" diff --git a/.test-infra/jenkins/job_PreCommit_SQL_Java17.groovy b/.test-infra/jenkins/job_PreCommit_SQL_Java17.groovy index 158fa683c1a8..472521cb1b08 100644 --- a/.test-infra/jenkins/job_PreCommit_SQL_Java17.groovy +++ b/.test-infra/jenkins/job_PreCommit_SQL_Java17.groovy @@ -26,7 +26,7 @@ PrecommitJobBuilder builder = new PrecommitJobBuilder( gradleSwitches: [ '-PdisableSpotlessCheck=true', '-PdisableCheckStyle=true', - '-PcompileAndRunTestsWithJava17', + '-PtestJavaVersion=17', '-PskipCheckerFramework', // Gradle itself is running under JDK8 so plugin configures wrong for JDK17 "-Pjava17Home=${properties.JAVA_17_HOME}" diff --git a/.test-infra/jenkins/job_Precommit_Java_Examples_Dataflow_Java17.groovy b/.test-infra/jenkins/job_Precommit_Java_Examples_Dataflow_Java17.groovy index 3654a4c75edb..387b707123e7 100644 --- a/.test-infra/jenkins/job_Precommit_Java_Examples_Dataflow_Java17.groovy +++ b/.test-infra/jenkins/job_Precommit_Java_Examples_Dataflow_Java17.groovy @@ -50,7 +50,7 @@ builder.build { switches '-PdisableSpotlessCheck=true' switches '-PdisableCheckStyle=true' switches '-PskipCheckerFramework' // Gradle itself is running under JDK8 so plugin configures wrong for JDK17 - switches '-PcompileAndRunTestsWithJava17' + switches '-PtestJavaVersion=17' switches "-Pjava17Home=${properties.JAVA_17_HOME}" properties.setGradleSwitches(delegate, 3 * Runtime.runtime.availableProcessors()) } diff --git a/build.gradle.kts b/build.gradle.kts index ea1b4e6784e3..5bcfbb3ed06e 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -712,14 +712,12 @@ if (project.hasProperty("javaLinkageArtifactIds")) { } } } -if (project.hasProperty("compileAndRunTestsWithJava11")) { - tasks.getByName("javaPreCommitPortabilityApi").dependsOn(":sdks:java:testing:test-utils:verifyJavaVersion") - tasks.getByName("javaExamplesDataflowPrecommit").dependsOn(":sdks:java:testing:test-utils:verifyJavaVersion") - tasks.getByName("sqlPreCommit").dependsOn(":sdks:java:testing:test-utils:verifyJavaVersion") -} else if (project.hasProperty("compileAndRunTestsWithJava17")) { - tasks.getByName("javaPreCommitPortabilityApi").dependsOn(":sdks:java:testing:test-utils:verifyJavaVersion17") - tasks.getByName("javaExamplesDataflowPrecommit").dependsOn(":sdks:java:testing:test-utils:verifyJavaVersion17") - tasks.getByName("sqlPreCommit").dependsOn(":sdks:java:testing:test-utils:verifyJavaVersion17") +if (project.hasProperty("testJavaVersion")) { + var testVer = project.property("testJavaVersion") + + tasks.getByName("javaPreCommitPortabilityApi").dependsOn(":sdks:java:testing:test-utils:verifyJavaVersion$testVer") + tasks.getByName("javaExamplesDataflowPrecommit").dependsOn(":sdks:java:testing:test-utils:verifyJavaVersion$testVer") + tasks.getByName("sqlPreCommit").dependsOn(":sdks:java:testing:test-utils:verifyJavaVersion$testVer") } else { allprojects { tasks.withType(Test::class).configureEach { diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 19ed6aaa4276..c32717aae725 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -946,6 +946,29 @@ class BeamModulePlugin implements Plugin { ] } + project.ext.setJava21Options = { CompileOptions options -> + def java17Home = project.findProperty("java17Home") + options.fork = true + options.forkOptions.javaHome = java17Home as File + options.compilerArgs += ['-Xlint:-path'] + // Error prone requires some packages to be exported/opened for Java 17 + // Disabling checks since this property is only used for Jenkins tests + // https://github.com/tbroyer/gradle-errorprone-plugin#jdk-16-support + options.errorprone.errorproneArgs.add("-XepDisableAllChecks") + options.forkOptions.jvmArgs += [ + "-J--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED", + "-J--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED", + "-J--add-exports=jdk.compiler/com.sun.tools.javac.main=ALL-UNNAMED", + "-J--add-exports=jdk.compiler/com.sun.tools.javac.model=ALL-UNNAMED", + "-J--add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED", + "-J--add-exports=jdk.compiler/com.sun.tools.javac.processing=ALL-UNNAMED", + "-J--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED", + "-J--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED", + "-J--add-opens=jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED", + "-J--add-opens=jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED" + ] + } + project.ext.repositories = { maven { name "testPublicationLocal" @@ -1492,7 +1515,7 @@ class BeamModulePlugin implements Plugin { options.errorprone.errorproneArgs.add("-Xep:Slf4jLoggerShouldBeNonStatic:OFF") } - if (project.hasProperty("compileAndRunTestsWithJava11")) { + if (project.findProperty('testJavaVersion') == "11") { def java11Home = project.findProperty("java11Home") project.tasks.compileTestJava { options.fork = true @@ -1504,7 +1527,7 @@ class BeamModulePlugin implements Plugin { useJUnit() executable = "${java11Home}/bin/java" } - } else if (project.hasProperty("compileAndRunTestsWithJava17")) { + } else if (project.findProperty('testJavaVersion') == "17") { def java17Home = project.findProperty("java17Home") project.tasks.compileTestJava { setCompileAndRuntimeJavaVersion(options.compilerArgs, '17') @@ -1514,6 +1537,16 @@ class BeamModulePlugin implements Plugin { useJUnit() executable = "${java17Home}/bin/java" } + } else if (project.findProperty('testJavaVersion') == "21") { + def java21Home = project.findProperty("java21Home") + project.tasks.compileTestJava { + setCompileAndRuntimeJavaVersion(options.compilerArgs, '21') + project.ext.setJava17Options(options) + } + project.tasks.withType(Test).configureEach { + useJUnit() + executable = "${java21Home}/bin/java" + } } if (configuration.shadowClosure) { diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Environments.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Environments.java index 31a555989afd..f531b5be344d 100644 --- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Environments.java +++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/Environments.java @@ -94,7 +94,8 @@ public class Environments { public enum JavaVersion { java8("java", "1.8", 8), java11("java11", "11", 11), - java17("java17", "17", 17); + java17("java17", "17", 17), + java21("java21", "21", 21); // Legacy name, as used in container image private final String legacyName; @@ -119,6 +120,7 @@ public String specification() { return this.specification; } + /** Return the LTS java version given the Java specification version. */ public static JavaVersion forSpecification(String specification) { for (JavaVersion ver : JavaVersion.values()) { if (ver.specification.equals(specification)) { @@ -137,7 +139,7 @@ public static JavaVersion forSpecification(String specification) { } } LOG.warn( - "unsupported Java version: {}, falling back to: {}", + "Unsupported Java version: {}, falling back to: {}", specification, fallback.specification); return fallback; diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/EnvironmentsTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/EnvironmentsTest.java index ae429fb1fe6d..b71a654f1031 100644 --- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/EnvironmentsTest.java +++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/EnvironmentsTest.java @@ -291,6 +291,8 @@ public void testLtsJavaVersion() { assertEquals("java11", JavaVersion.java11.legacyName()); assertEquals(JavaVersion.java17, JavaVersion.forSpecification("17")); assertEquals("java17", JavaVersion.java17.legacyName()); + assertEquals(JavaVersion.java21, JavaVersion.forSpecification("21")); + assertEquals("java21", JavaVersion.java21.legacyName()); } @Test @@ -303,7 +305,9 @@ public void testNonLtsJavaVersion() { assertEquals(JavaVersion.java17, JavaVersion.forSpecification("15")); assertEquals(JavaVersion.java17, JavaVersion.forSpecification("16")); assertEquals(JavaVersion.java17, JavaVersion.forSpecification("18")); - assertEquals(JavaVersion.java17, JavaVersion.forSpecification("19")); + assertEquals(JavaVersion.java21, JavaVersion.forSpecification("19")); + assertEquals(JavaVersion.java21, JavaVersion.forSpecification("20")); + assertEquals(JavaVersion.java21, JavaVersion.forSpecification("21")); } @Test(expected = UnsupportedOperationException.class) diff --git a/runners/google-cloud-dataflow-java/arm/build.gradle b/runners/google-cloud-dataflow-java/arm/build.gradle index e79eeedcd828..71cbc7c58e86 100644 --- a/runners/google-cloud-dataflow-java/arm/build.gradle +++ b/runners/google-cloud-dataflow-java/arm/build.gradle @@ -76,10 +76,8 @@ dependencies { } def javaVer = "java8" -if(project.hasProperty('compileAndRunTestsWithJava17')) { - javaVer = "java17" -} else if(project.hasProperty('compileAndRunTestsWithJava11')) { - javaVer = "java11" +if (project.hasProperty('testJavaVersion')) { + javaVer = "java${project.getProperty('testJavaVersion')}" } def dataflowProject = project.findProperty('dataflowProject') ?: 'apache-beam-testing' def dataflowRegion = project.findProperty('dataflowRegion') ?: 'us-central1' diff --git a/runners/google-cloud-dataflow-java/build.gradle b/runners/google-cloud-dataflow-java/build.gradle index adc1f2e09bc4..e4f34687d31c 100644 --- a/runners/google-cloud-dataflow-java/build.gradle +++ b/runners/google-cloud-dataflow-java/build.gradle @@ -279,10 +279,8 @@ def createRunnerV2ValidatesRunnerTest = { Map args -> // task ordering such that the registry doesn't get cleaned up prior to task completion. def buildAndPushDockerJavaContainer = tasks.register("buildAndPushDockerJavaContainer") { def javaVer = "java8" - if(project.hasProperty('compileAndRunTestsWithJava17')) { - javaVer = "java17" - } else if(project.hasProperty('compileAndRunTestsWithJava11')) { - javaVer = "java11" + if(project.hasProperty('testJavaVersion')) { + javaVer = "java${project.getProperty('testJavaVersion')}" } dependsOn ":sdks:java:container:${javaVer}:docker" def defaultDockerImageName = containerImageName( diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java index 26548038a1df..891b4c0454c9 100644 --- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java +++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java @@ -398,10 +398,9 @@ && isServiceEndpoint(dataflowOptions.getDataflowEndpoint())) { // Adding the Java version to the SDK name for user's and support convenience. String agentJavaVer = "(JRE 8 environment)"; - if (Environments.getJavaVersion() == Environments.JavaVersion.java17) { - agentJavaVer = "(JRE 17 environment)"; - } else if (Environments.getJavaVersion() == Environments.JavaVersion.java11) { - agentJavaVer = "(JRE 11 environment)"; + if (Environments.getJavaVersion() != Environments.JavaVersion.java8) { + agentJavaVer = + String.format("(JRE %s environment)", Environments.getJavaVersion().specification()); } DataflowRunnerInfo dataflowRunnerInfo = DataflowRunnerInfo.getDataflowRunnerInfo(); diff --git a/runners/spark/spark_runner.gradle b/runners/spark/spark_runner.gradle index d0dbe453ddfb..74013de6107d 100644 --- a/runners/spark/spark_runner.gradle +++ b/runners/spark/spark_runner.gradle @@ -63,8 +63,9 @@ def sparkTestProperties(overrides = [:]) { def sparkTestJvmArgs() { - // run tests with Java 17 using -PcompileAndRunTestsWithJava17 -Pjava17Home=??? - if (project.hasProperty("compileAndRunTestsWithJava17")) { + // run tests with Java 17 using -PtestJavaVersion=17 -Pjava17Home=??? + if (project.hasProperty('testJavaVersion') && + project.getProperty('testJavaVersion') in ['17', '21']) { return [ "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED", // add-opens below required for Kryo FieldSerializer / SparkRunnerKryoRegistratorTest diff --git a/sdks/java/container/Dockerfile b/sdks/java/container/Dockerfile index e0fa8d4a0a6f..9c266ea132b8 100644 --- a/sdks/java/container/Dockerfile +++ b/sdks/java/container/Dockerfile @@ -15,8 +15,9 @@ # See the License for the specific language governing permissions and # limitations under the License. ############################################################################### +ARG base_image ARG java_version -FROM eclipse-temurin:${java_version} +FROM ${base_image}:${java_version} LABEL Author "Apache Beam " ARG TARGETOS ARG TARGETARCH diff --git a/sdks/java/container/common.gradle b/sdks/java/container/common.gradle index cc427494ed6e..9f6cbe64a294 100644 --- a/sdks/java/container/common.gradle +++ b/sdks/java/container/common.gradle @@ -29,6 +29,7 @@ applyDockerNature() if (!project.hasProperty('imageJavaVersion')) { throw new GradleException('imageJavaVersion project property must be set') } +def javaBaseImage = project.findProperty('javaBaseImage') ?: 'eclipse-temurin' def imageJavaVersion = project.findProperty('imageJavaVersion') description = "Apache Beam :: SDKs :: Java :: Container :: Java ${imageJavaVersion} Container" @@ -71,19 +72,19 @@ task copySdkHarnessLauncher(type: Copy) { } task copyJavaThirdPartyLicenses(type: Copy) { - from("${project(':sdks:java:container').buildDir}/target/third_party_licenses") + from project(':sdks:java:container').layout.buildDirectory.dir('target/third_party_licenses') into "build/target/third_party_licenses" dependsOn ':sdks:java:container:pullLicenses' } task copyGolangLicenses(type: Copy) { - from "${project(':release:go-licenses:java').buildDir}/output" + from project(':release:go-licenses:java').layout.buildDirectory.dir('output') into "build/target/go-licenses" dependsOn ':release:go-licenses:java:createLicenses' } task copyJdkOptions(type: Copy) { - if (imageJavaVersion == "17" || imageJavaVersion == "11") { + if (["11", "17", "21"].contains(imageJavaVersion)) { from "option-jamm.json" } from "java${imageJavaVersion}-security.properties" @@ -97,10 +98,10 @@ task skipPullLicenses(type: Exec) { } task validateJavaHome { - if (imageJavaVersion == "11" || imageJavaVersion == "17") { + if (["11", "17", "21"].contains(imageJavaVersion)) { doFirst { - if (!project.hasProperty('java17Home') && !project.hasProperty('java11Home')) { - throw new GradleException('java17Home or java11Home property required. Re-run with -Pjava17Home or -Pjava11Home') + if (!project.hasProperty("java${imageJavaVersion}Home")) { + throw new GradleException("java${imageJavaVersion}Home property required for imageJavaVersion=${imageJavaVersion}. Re-run with -Pjava${imageJavaVersion}Home") } } } @@ -124,6 +125,7 @@ docker { buildArgs([ 'pull_licenses': project.rootProject.hasProperty(["docker-pull-licenses"]) || project.rootProject.hasProperty(["isRelease"]), + 'base_image': javaBaseImage, 'java_version': imageJavaVersion, ]) buildx useBuildx diff --git a/sdks/java/container/java21/build.gradle b/sdks/java/container/java21/build.gradle new file mode 100644 index 000000000000..038064102dcb --- /dev/null +++ b/sdks/java/container/java21/build.gradle @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +project.ext { + // TODO(https://github.com/apache/beam/issues/28120) switch to temurin once available + javaBaseImage = 'openjdk' + imageJavaVersion = '21' +} + +// Load the main build script which contains all build logic. +apply from: "../common.gradle" + +dependencies { + dockerDependency project(path: ":sdks:java:container:agent") +} \ No newline at end of file diff --git a/sdks/java/container/java21/option-jamm.json b/sdks/java/container/java21/option-jamm.json new file mode 100644 index 000000000000..5647ff66be5c --- /dev/null +++ b/sdks/java/container/java21/option-jamm.json @@ -0,0 +1,12 @@ +{ + "name": "jamm", + "enabled": true, + "options": { + "java_arguments": [ + "--add-modules=jamm", + "--module-path=/opt/apache/beam/jars/jamm.jar", + "--add-opens=java.base/java.lang=jamm", + "--add-opens=java.base/java.util=jamm" + ] + } +} \ No newline at end of file diff --git a/sdks/java/testing/jpms-tests/build.gradle b/sdks/java/testing/jpms-tests/build.gradle index 6321f874c903..2a25463931c1 100644 --- a/sdks/java/testing/jpms-tests/build.gradle +++ b/sdks/java/testing/jpms-tests/build.gradle @@ -23,10 +23,8 @@ plugins { } // overwrite javaVersion before applyJavaNature -if (project.hasProperty("compileAndRunTestsWithJava17")) { - javaVersion = '1.17' -} else { - javaVersion = '1.11' +if (project.hasProperty("testJavaVersion")) { + javaVersion = "1.${project.getProperty('testJavaVersion')}" as String } applyJavaNature( @@ -42,13 +40,14 @@ ext.summary = "E2E test for Java 9 modules" // direct compileJava to use specified java version. project.tasks.compileJava { - if (project.hasProperty("compileAndRunTestsWithJava11")) { + if (project.hasProperty('testJavaVersion')) { options.fork = true - options.forkOptions.javaHome = project.findProperty("java11Home") as File - } else if (project.hasProperty("compileAndRunTestsWithJava17")) { - options.fork = true - options.forkOptions.javaHome = project.findProperty("java17Home") as File - setJava17Options(options) + options.forkOptions.javaHome = project.findProperty("java${project.getProperty('testJavaVersion')}Home") as File + if (project.getProperty('testJavaVersion') == '17') { + setJava17Options(options) + } else if (project.getProperty('testJavaVersion') == '21') { + setJava21Options(options) + } } } @@ -120,7 +119,7 @@ plugins.withType(JavaPlugin).configureEach{ // JPMS requires JDK > 8 project.tasks.each { it.onlyIf { - project.hasProperty("compileAndRunTestsWithJava17") + project.hasProperty('testJavaVersion') || JavaVersion.VERSION_1_8.compareTo(JavaVersion.current()) < 0 } } diff --git a/sdks/java/testing/test-utils/build.gradle b/sdks/java/testing/test-utils/build.gradle index 50c815dd57f7..6e30693d8894 100644 --- a/sdks/java/testing/test-utils/build.gradle +++ b/sdks/java/testing/test-utils/build.gradle @@ -43,24 +43,15 @@ dependencies { testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadowTest") } -task verifyJavaVersion(type: Test) { - filter { - includeTestsMatching 'org.apache.beam.sdk.testutils.jvmverification.JvmVerification.verifyCodeIsCompiledWithJava8' - includeTestsMatching 'org.apache.beam.sdk.testutils.jvmverification.JvmVerification.verifyTestCodeIsCompiledWithJava11' - includeTestsMatching 'org.apache.beam.sdk.testutils.jvmverification.JvmVerification.verifyRunningJVMVersionIs11' - } - doLast { - println 'Java verified' +['11', '17', '21'].each { + tasks.create(name: "verifyJavaVersion${it}", type: Test) { + filter { + includeTestsMatching "org.apache.beam.sdk.testutils.jvmverification.JvmVerification.verifyCodeIsCompiledWithJava8" + includeTestsMatching "org.apache.beam.sdk.testutils.jvmverification.JvmVerification.verifyTestCodeIsCompiledWithJava${it}" + includeTestsMatching "org.apache.beam.sdk.testutils.jvmverification.JvmVerification.verifyRunningJVMVersionIs${it}" + } + doLast { + println 'Java verified' + } } } - -task verifyJavaVersion17(type: Test) { - filter { - includeTestsMatching 'org.apache.beam.sdk.testutils.jvmverification.JvmVerification.verifyCodeIsCompiledWithJava8' - includeTestsMatching 'org.apache.beam.sdk.testutils.jvmverification.JvmVerification.verifyTestCodeIsCompiledWithJava17' - includeTestsMatching 'org.apache.beam.sdk.testutils.jvmverification.JvmVerification.verifyRunningJVMVersionIs17' - } - doLast { - println 'Java verified' - } -} \ No newline at end of file diff --git a/sdks/java/testing/test-utils/src/test/java/org/apache/beam/sdk/testutils/jvmverification/JvmVerification.java b/sdks/java/testing/test-utils/src/test/java/org/apache/beam/sdk/testutils/jvmverification/JvmVerification.java index ad29e8b6a1d6..a6b5d6dca6c1 100644 --- a/sdks/java/testing/test-utils/src/test/java/org/apache/beam/sdk/testutils/jvmverification/JvmVerification.java +++ b/sdks/java/testing/test-utils/src/test/java/org/apache/beam/sdk/testutils/jvmverification/JvmVerification.java @@ -20,6 +20,7 @@ import static org.apache.beam.sdk.testutils.jvmverification.JvmVerification.Java.v11; import static org.apache.beam.sdk.testutils.jvmverification.JvmVerification.Java.v17; import static org.apache.beam.sdk.testutils.jvmverification.JvmVerification.Java.v1_8; +import static org.apache.beam.sdk.testutils.jvmverification.JvmVerification.Java.v21; import static org.junit.Assert.assertEquals; import java.io.IOException; @@ -39,6 +40,7 @@ public class JvmVerification { versionMapping.put("0034", v1_8); versionMapping.put("0037", v11); versionMapping.put("003d", v17); + versionMapping.put("0041", v21); } // bytecode @@ -62,6 +64,11 @@ public void verifyTestCodeIsCompiledWithJava17() throws IOException { assertEquals(v17, getByteCodeVersion(JvmVerification.class)); } + @Test + public void verifyTestCodeIsCompiledWithJava21() throws IOException { + assertEquals(v21, getByteCodeVersion(JvmVerification.class)); + } + // jvm @Test public void verifyRunningJVMVersionIs11() { @@ -75,6 +82,12 @@ public void verifyRunningJVMVersionIs17() { assertEquals(v17.name, version); } + @Test + public void verifyRunningJVMVersionIs21() { + final String version = getJavaSpecification(); + assertEquals(v21.name, version); + } + private static Java getByteCodeVersion(final Class clazz) throws IOException { final InputStream stream = clazz.getClassLoader().getResourceAsStream(clazz.getName().replace(".", "/") + ".class"); @@ -91,7 +104,8 @@ private static String getJavaSpecification() { enum Java { v1_8("1.8"), v11("11"), - v17("17"); + v17("17"), + v21("21"); final String name; diff --git a/settings.gradle.kts b/settings.gradle.kts index c370c5da27d1..d1069ec2d352 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -175,6 +175,7 @@ include(":sdks:java:container:agent") include(":sdks:java:container:java8") include(":sdks:java:container:java11") include(":sdks:java:container:java17") +include(":sdks:java:container:java21") include(":sdks:java:core") include(":sdks:java:core:jmh") include(":sdks:java:expansion-service") diff --git a/website/www/site/content/en/roadmap/java-sdk.md b/website/www/site/content/en/roadmap/java-sdk.md index b65424b57a3d..a1c85e139193 100644 --- a/website/www/site/content/en/roadmap/java-sdk.md +++ b/website/www/site/content/en/roadmap/java-sdk.md @@ -17,9 +17,9 @@ limitations under the License. # Java SDK Roadmap -## Next Java LTS version support (Java 17) +## Next Java LTS version support (Java 21) Work to support the next LTS release of Java is in progress. For more details -about the scope and info on the various tasks please see the JIRA ticket. +about the scope and info on the various tasks please see the GitHub Issue. -- JIRA: [BEAM-12240](https://issues.apache.org/jira/browse/BEAM-12240) +- GitHub: [#28120](https://github.com/apache/beam/issues/28120) From 9c75db4760ca3440a0a2ed12f031d84bc011aa96 Mon Sep 17 00:00:00 2001 From: Robert Bradshaw Date: Wed, 11 Oct 2023 18:19:29 -0700 Subject: [PATCH 02/13] [YAML] Avro format for PubSub. (#28899) --- sdks/python/apache_beam/yaml/yaml_io.py | 21 ++++++ sdks/python/apache_beam/yaml/yaml_io_test.py | 68 ++++++++++++++++++++ 2 files changed, 89 insertions(+) diff --git a/sdks/python/apache_beam/yaml/yaml_io.py b/sdks/python/apache_beam/yaml/yaml_io.py index 3321644ded57..b2bf150fa558 100644 --- a/sdks/python/apache_beam/yaml/yaml_io.py +++ b/sdks/python/apache_beam/yaml/yaml_io.py @@ -23,6 +23,7 @@ implementations of the same transforms, the configs must be kept in sync. """ +import io import os from typing import Any from typing import Callable @@ -32,12 +33,14 @@ from typing import Optional from typing import Tuple +import fastavro import yaml import apache_beam as beam import apache_beam.io as beam_io from apache_beam.io import ReadFromBigQuery from apache_beam.io import WriteToBigQuery +from apache_beam.io import avroio from apache_beam.io.gcp.bigquery import BigQueryDisposition from apache_beam.portability.api import schema_pb2 from apache_beam.typehints import schemas @@ -146,6 +149,13 @@ def _create_parser( elif format == 'json': beam_schema = json_utils.json_schema_to_beam_schema(schema) return beam_schema, json_utils.json_parser(beam_schema) + elif format == 'avro': + beam_schema = avroio.avro_schema_to_beam_schema(schema) + covert_to_row = avroio.avro_dict_to_beam_row(schema, beam_schema) + return ( + beam_schema, + lambda record: covert_to_row( + fastavro.schemaless_reader(io.BytesIO(record), schema))) else: raise ValueError(f'Unknown format: {format}') @@ -162,6 +172,17 @@ def _create_formatter( return lambda row: getattr(row, field_names[0]) elif format == 'json': return json_utils.json_formater(beam_schema) + elif format == 'avro': + avro_schema = schema or avroio.beam_schema_to_avro_schema(beam_schema) + from_row = avroio.beam_row_to_avro_dict(avro_schema, beam_schema) + + def formatter(row): + buffer = io.BytesIO() + fastavro.schemaless_writer(buffer, avro_schema, from_row(row)) + buffer.seek(0) + return buffer.read() + + return formatter else: raise ValueError(f'Unknown format: {format}') diff --git a/sdks/python/apache_beam/yaml/yaml_io_test.py b/sdks/python/apache_beam/yaml/yaml_io_test.py index 72675da278b0..7071860a7bf1 100644 --- a/sdks/python/apache_beam/yaml/yaml_io_test.py +++ b/sdks/python/apache_beam/yaml/yaml_io_test.py @@ -15,9 +15,12 @@ # limitations under the License. # +import io +import json import logging import unittest +import fastavro import mock import apache_beam as beam @@ -167,6 +170,48 @@ def test_read_with_id_attribute(self): result, equal_to([beam.Row(payload=b'msg1'), beam.Row(payload=b'msg2')])) + _avro_schema = { + 'type': 'record', + 'name': 'ec', + 'fields': [{ + 'name': 'label', 'type': 'string' + }, { + 'name': 'rank', 'type': 'int' + }] + } + + def _encode_avro(self, data): + buffer = io.BytesIO() + fastavro.schemaless_writer(buffer, self._avro_schema, data) + buffer.seek(0) + return buffer.read() + + def test_read_avro(self): + + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + with mock.patch( + 'apache_beam.io.ReadFromPubSub', + FakeReadFromPubSub( + topic='my_topic', + messages=[PubsubMessage(self._encode_avro({'label': '37a', + 'rank': 1}), {}), + PubsubMessage(self._encode_avro({'label': '389a', + 'rank': 2}), {})])): + result = p | YamlTransform( + ''' + type: ReadFromPubSub + config: + topic: my_topic + format: avro + schema: %s + ''' % json.dumps(self._avro_schema)) + assert_that( + result, + equal_to( + [beam.Row(label='37a', rank=1), # linebreak + beam.Row(label='389a', rank=2)])) + def test_read_json(self): with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( pickle_library='cloudpickle')) as p: @@ -346,6 +391,29 @@ def test_write_with_id_attribute(self): id_attribute: some_attr ''')) + def test_write_avro(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + with mock.patch( + 'apache_beam.io.WriteToPubSub', + FakeWriteToPubSub( + topic='my_topic', + messages=[PubsubMessage(self._encode_avro({'label': '37a', + 'rank': 1}), {}), + PubsubMessage(self._encode_avro({'label': '389a', + 'rank': 2}), {})])): + _ = ( + p | beam.Create( + [beam.Row(label='37a', rank=1), beam.Row(label='389a', rank=2)]) + | YamlTransform( + ''' + type: WriteToPubSub + input: input + config: + topic: my_topic + format: avro + ''')) + def test_write_json(self): with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( pickle_library='cloudpickle')) as p: From fb99630d1e0924eec54239ee369b2042b15d5187 Mon Sep 17 00:00:00 2001 From: Kenneth Knowles Date: Tue, 3 Oct 2023 13:28:55 -0400 Subject: [PATCH 03/13] [Website] Announce Beam 2.51.0 --- CHANGES.md | 18 +- website/www/site/config.toml | 2 +- .../www/site/content/en/blog/beam-2.51.0.md | 210 ++++++++++++++++++ .../site/content/en/get-started/downloads.md | 14 +- 4 files changed, 226 insertions(+), 18 deletions(-) create mode 100644 website/www/site/content/en/blog/beam-2.51.0.md diff --git a/CHANGES.md b/CHANGES.md index 0c2c2e3f79f4..43ade8c530b9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -87,16 +87,7 @@ * ([#X](https://github.com/apache/beam/issues/X)). -# [2.51.0] - Unreleased - -## Highlights - -* New highly anticipated feature X added to Python SDK ([#X](https://github.com/apache/beam/issues/X)). -* New highly anticipated feature Y added to Java SDK ([#Y](https://github.com/apache/beam/issues/Y)). - -## I/Os - -* Support for X source added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). +# [2.51.0] - 2023-10-03 ## New Features / Improvements @@ -104,6 +95,7 @@ * In Python, the [VertexAIModelHandlerJSON](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.inference.vertex_ai_inference.html#apache_beam.ml.inference.vertex_ai_inference.VertexAIModelHandlerJSON) now supports passing in inference_args. These will be passed through to the Vertex endpoint as parameters. * Added support to run `mypy` on user pipelines ([#27906](https://github.com/apache/beam/issues/27906)) + ## Breaking Changes * Removed fastjson library dependency for Beam SQL. Table property is changed to be based on jackson ObjectNode (Java) ([#24154](https://github.com/apache/beam/issues/24154)). @@ -111,9 +103,6 @@ * Removed the parameter `t reflect.Type` from `parquetio.Write`. The element type is derived from the input PCollection (Go) ([#28490](https://github.com/apache/beam/issues/28490)) * Refactor BeamSqlSeekableTable.setUp adding a parameter joinSubsetType. [#28283](https://github.com/apache/beam/issues/28283) -## Deprecations - -* X behavior is deprecated and will be removed in X versions ([#X](https://github.com/apache/beam/issues/X)). ## Bugfixes @@ -128,7 +117,8 @@ ## Known Issues -* ([#X](https://github.com/apache/beam/issues/X)). +* Python pipelines using BigQuery Storage Read API must pin `fastavro` + dependency to 1.8.3 or earlier: [#28811](https://github.com/apache/beam/issues/28811) # [2.50.0] - 2023-08-30 diff --git a/website/www/site/config.toml b/website/www/site/config.toml index 6a1907e60591..c7b0cd3412e6 100644 --- a/website/www/site/config.toml +++ b/website/www/site/config.toml @@ -104,7 +104,7 @@ github_project_repo = "https://github.com/apache/beam" [params] description = "Apache Beam is an open source, unified model and set of language-specific SDKs for defining and executing data processing workflows, and also data ingestion and integration flows, supporting Enterprise Integration Patterns (EIPs) and Domain Specific Languages (DSLs). Dataflow pipelines simplify the mechanics of large-scale batch and streaming data processing and can run on a number of runtimes like Apache Flink, Apache Spark, and Google Cloud Dataflow (a cloud service). Beam also brings DSL in different languages, allowing users to easily implement their data integration processes." -release_latest = "2.50.0" +release_latest = "2.51.0" # The repository and branch where the files live in Github or Colab. This is used # to serve and stage from your local branch, but publish to the master branch. # e.g. https://github.com/{{< param branch_repo >}}/path/to/notebook.ipynb diff --git a/website/www/site/content/en/blog/beam-2.51.0.md b/website/www/site/content/en/blog/beam-2.51.0.md new file mode 100644 index 000000000000..aaa4142bae62 --- /dev/null +++ b/website/www/site/content/en/blog/beam-2.51.0.md @@ -0,0 +1,210 @@ +--- +title: "Apache Beam 2.51.0" +date: 2023-10-11 09:00:00 -0400 +categories: + - blog + - release +authors: + - klk +--- + + +We are happy to present the new 2.51.0 release of Beam. +This release includes both improvements and new functionality. +See the [download page](/get-started/downloads/#2510-2023-10-03) for this release. + + + +For more information on changes in 2.51.0, check out the [detailed release notes](https://github.com/apache/beam/milestone/15). + +## New Features / Improvements + +* In Python, [RunInference](https://beam.apache.org/documentation/sdks/python-machine-learning/#why-use-the-runinference-api) now supports loading many models in the same transform using a [KeyedModelHandler](https://beam.apache.org/documentation/sdks/python-machine-learning/#use-a-keyed-modelhandler) ([#27628](https://github.com/apache/beam/issues/27628)). +* In Python, the [VertexAIModelHandlerJSON](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.inference.vertex_ai_inference.html#apache_beam.ml.inference.vertex_ai_inference.VertexAIModelHandlerJSON) now supports passing in inference_args. These will be passed through to the Vertex endpoint as parameters. +* Added support to run `mypy` on user pipelines ([#27906](https://github.com/apache/beam/issues/27906)) + + +## Breaking Changes + +* Removed fastjson library dependency for Beam SQL. Table property is changed to be based on jackson ObjectNode (Java) ([#24154](https://github.com/apache/beam/issues/24154)). +* Removed TensorFlow from Beam Python container images [PR](https://github.com/apache/beam/pull/28424). If you have been negatively affected by this change, please comment on [#20605](https://github.com/apache/beam/issues/20605). +* Removed the parameter `t reflect.Type` from `parquetio.Write`. The element type is derived from the input PCollection (Go) ([#28490](https://github.com/apache/beam/issues/28490)) +* Refactor BeamSqlSeekableTable.setUp adding a parameter joinSubsetType. [#28283](https://github.com/apache/beam/issues/28283) + + +## Bugfixes + +* Fixed exception chaining issue in GCS connector (Python) ([#26769](https://github.com/apache/beam/issues/26769#issuecomment-1700422615)). +* Fixed streaming inserts exception handling, GoogleAPICallErrors are now retried according to retry strategy and routed to failed rows where appropriate rather than causing a pipeline error (Python) ([#21080](https://github.com/apache/beam/issues/21080)). +* Fixed a bug in Python SDK's cross-language Bigtable sink that mishandled records that don't have an explicit timestamp set: [#28632](https://github.com/apache/beam/issues/28632). + + +## Security Fixes +* Python containers updated, fixing [CVE-2021-30474](https://nvd.nist.gov/vuln/detail/CVE-2021-30474), [CVE-2021-30475](https://nvd.nist.gov/vuln/detail/CVE-2021-30475), [CVE-2021-30473](https://nvd.nist.gov/vuln/detail/CVE-2021-30473), [CVE-2020-36133](https://nvd.nist.gov/vuln/detail/CVE-2020-36133), [CVE-2020-36131](https://nvd.nist.gov/vuln/detail/CVE-2020-36131), [CVE-2020-36130](https://nvd.nist.gov/vuln/detail/CVE-2020-36130), and [CVE-2020-36135](https://nvd.nist.gov/vuln/detail/CVE-2020-36135) +* Used go 1.21.1 to build, fixing [CVE-2023-39320](https://security-tracker.debian.org/tracker/CVE-2023-39320) + + +## Known Issues + +* Python pipelines using BigQuery Storage Read API must pin `fastavro` dependency to 1.8.3 + or earlier: [#28811](https://github.com/apache/beam/issues/28811) + +## List of Contributors + +According to git shortlog, the following people contributed to the 2.50.0 release. Thank you to all contributors! + +Adam Whitmore + +Ahmed Abualsaud + +Ahmet Altay + +Aleksandr Dudko + +Alexey Romanenko + +Anand Inguva + +Andrey Devyatkin + +Arvind Ram + +Arwin Tio + +BjornPrime + +Bruno Volpato + +Bulat + +Celeste Zeng + +Chamikara Jayalath + +Clay Johnson + +Damon + +Danny McCormick + +David Cavazos + +Dip Patel + +Hai Joey Tran + +Hao Xu + +Haruka Abe + +Jack Dingilian + +Jack McCluskey + +Jeff Kinard + +Jeffrey Kinard + +Joey Tran + +Johanna Öjeling + +Julien Tournay + +Kenneth Knowles + +Kerry Donny-Clark + +Mattie Fu + +Melissa Pashniak + +Michel Davit + +Moritz Mack + +Pranav Bhandari + +Rebecca Szper + +Reeba Qureshi + +Reuven Lax + +Ritesh Ghorse + +Robert Bradshaw + +Robert Burke + +Ruwann + +Ryan Tam + +Sam Rohde + +Sereana Seim + +Svetak Sundhar + +Tim Grein + +Udi Meiri + +Valentyn Tymofieiev + +Vitaly Terentyev + +Vlado Djerek + +Xinyu Liu + +Yi Hu + +Zbynek Konecny + +Zechen Jiang + +bzablocki + +caneff + +dependabot[bot] + +gDuperran + +gabry.wu + +johnjcasey + +kberezin-nshl + +kennknowles + +liferoad + +lostluck + +magicgoody + +martin trieu + +mosche + +olalamichelle + +tvalentyn + +xqhu + +Łukasz Spyra diff --git a/website/www/site/content/en/get-started/downloads.md b/website/www/site/content/en/get-started/downloads.md index 9a753dafe32e..b564a5801cd8 100644 --- a/website/www/site/content/en/get-started/downloads.md +++ b/website/www/site/content/en/get-started/downloads.md @@ -96,10 +96,18 @@ versions denoted `0.x.y`. ## Releases +### 2.51.0 (2023-10-11) +Official [source code download](https://downloads.apache.org/beam/2.51.0/apache-beam-2.51.0-source-release.zip). +[SHA-512](https://downloads.apache.org/beam/2.51.0/apache-beam-2.51.0-source-release.zip.sha512). +[signature](https://downloads.apache.org/beam/2.51.0/apache-beam-2.51.0-source-release.zip.asc). + +[Release notes](https://github.com/apache/beam/releases/tag/v2.51.0) +[Blog post](/blog/beam-2.51.0). + ### 2.50.0 (2023-08-30) -Official [source code download](https://downloads.apache.org/beam/2.50.0/apache-beam-2.50.0-source-release.zip). -[SHA-512](https://downloads.apache.org/beam/2.50.0/apache-beam-2.50.0-source-release.zip.sha512). -[signature](https://downloads.apache.org/beam/2.50.0/apache-beam-2.50.0-source-release.zip.asc). +Official [source code download](https://archive.apache.org/beam/2.50.0/apache-beam-2.50.0-source-release.zip). +[SHA-512](https://archive.apache.org/beam/2.50.0/apache-beam-2.50.0-source-release.zip.sha512). +[signature](https://archive.apache.org/beam/2.50.0/apache-beam-2.50.0-source-release.zip.asc). [Release notes](https://github.com/apache/beam/releases/tag/v2.50.0) [Blog post](/blog/beam-2.50.0). From 6b3b3eb603fbd6feccf72a093bd520c5ca973cb5 Mon Sep 17 00:00:00 2001 From: Robert Burke Date: Wed, 11 Oct 2023 19:43:06 -0700 Subject: [PATCH 04/13] Update go_tests.yml to cache (#28954) Configure caching go deps properly in Go Tests action. --- .github/workflows/go_tests.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/go_tests.yml b/.github/workflows/go_tests.yml index 6884dd692522..66c57d219a83 100644 --- a/.github/workflows/go_tests.yml +++ b/.github/workflows/go_tests.yml @@ -46,6 +46,8 @@ jobs: - uses: actions/setup-go@v4 with: go-version: '1.21' + cache-dependency-path: | + sdks/go.sum - name: Delete old coverage run: "cd sdks && rm -rf .coverage.txt || :" - name: Run coverage @@ -70,4 +72,4 @@ jobs: with: work-dir: ./sdks go-package: ./... - go-version-input: 1.21 \ No newline at end of file + go-version-input: 1.21 From 4a7c4842aceb6786040d5cd699b7916c4509bb7b Mon Sep 17 00:00:00 2001 From: Kerry Donny-Clark Date: Wed, 11 Oct 2023 23:35:25 -0400 Subject: [PATCH 05/13] Add new resource hint to all sdks for number of cpus per worker machine (#28848) * Adds new resource hint for number of cpus per worker. * Fixes minor bugs.` * Go fmt and removes unwanted .python-version file. * Adds tests. * Fixes typo. * Fixes Java tests, adds URN to parsers. * Addresses FindBugs issue with int parsing. * Applies Java formatting corrections * Adds generated go protobufs --- .../model/pipeline/v1/beam_runner_api.proto | 4 + .../model/fnexecution_v1/beam_fn_api.pb.go | 67 ++++++++++----- .../fnexecution_v1/beam_fn_api_grpc.pb.go | 2 +- .../fnexecution_v1/beam_provision_api.pb.go | 2 +- .../beam_provision_api_grpc.pb.go | 2 +- .../jobmanagement_v1/beam_artifact_api.pb.go | 2 +- .../beam_artifact_api_grpc.pb.go | 2 +- .../jobmanagement_v1/beam_expansion_api.pb.go | 2 +- .../beam_expansion_api_grpc.pb.go | 2 +- .../model/jobmanagement_v1/beam_job_api.pb.go | 2 +- .../jobmanagement_v1/beam_job_api_grpc.pb.go | 2 +- .../model/pipeline_v1/beam_runner_api.pb.go | 81 ++++++++++--------- .../pipeline_v1/beam_runner_api_grpc.pb.go | 2 +- .../beam/model/pipeline_v1/endpoints.pb.go | 2 +- .../pipeline_v1/external_transforms.pb.go | 2 +- .../pkg/beam/model/pipeline_v1/metrics.pb.go | 2 +- .../pkg/beam/model/pipeline_v1/schema.pb.go | 2 +- .../pipeline_v1/standard_window_fns.pb.go | 2 +- sdks/go/pkg/beam/options/resource/hint.go | 37 +++++++++ .../go/pkg/beam/options/resource/hint_test.go | 45 ++++++++++- .../resourcehints/ResourceHints.java | 63 +++++++++++++++ .../resourcehints/ResourceHintsTest.java | 11 ++- .../apache_beam/transforms/resources.py | 16 ++++ .../apache_beam/transforms/resources_test.py | 6 ++ 24 files changed, 281 insertions(+), 79 deletions(-) diff --git a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto index 2483103b5794..db958f183c45 100644 --- a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto +++ b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto @@ -1982,5 +1982,9 @@ message StandardResourceHints { // SDKs should convert the size to bytes, but can allow users to specify human-friendly units (e.g. GiB). // Payload: ASCII encoded string of the base 10 representation of an integer number of bytes. MIN_RAM_BYTES = 1 [(beam_urn) = "beam:resources:min_ram_bytes:v1"]; + // Describes desired number of CPUs available in transform's execution environment. + // SDKs should accept and validate a positive integer count. + // Payload: ASCII encoded string of the base 10 representation of an integer number of CPUs. + CPU_COUNT = 2 [(beam_urn) = "beam:resources:cpu_count:v1"]; } } diff --git a/sdks/go/pkg/beam/model/fnexecution_v1/beam_fn_api.pb.go b/sdks/go/pkg/beam/model/fnexecution_v1/beam_fn_api.pb.go index 1d547470ea1a..9d14cff3c7d6 100644 --- a/sdks/go/pkg/beam/model/fnexecution_v1/beam_fn_api.pb.go +++ b/sdks/go/pkg/beam/model/fnexecution_v1/beam_fn_api.pb.go @@ -27,7 +27,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v4.24.0--rc1 +// protoc v4.24.4 // source: org/apache/beam/model/fn_execution/v1/beam_fn_api.proto // TODO: Consider consolidating common components in another package @@ -1883,30 +1883,57 @@ func (x *ProcessBundleSplitRequest) GetDesiredSplits() map[string]*ProcessBundle // first_residual_element. // - The current bundle, if no further splits happen, will have done exactly // the work under primary_roots and all elements up to and including the -// channel splits last_primary_element. +// channel split's last_primary_element. // // This allows the SDK to relinquish ownership of and commit to not process some // of the elements that it may have been sent (the residual) while retaining // ownership and commitment to finish the other portion (the primary). // -// For example, lets say the SDK is processing elements A B C D E and a split -// request comes in. The SDK could return a response with a channel split -// representing a last_primary_element of 3 (D) and first_residual_element of 4 -// (E). The SDK is now responsible for processing A B C D and the runner must -// process E in the future. A future split request could have the SDK split the -// elements B into B1 and B2 and C into C1 and C2 representing their primary and -// residual roots. The SDK would return a response with a channel split -// representing a last_primary_element of 0 (A) and first_residual_element of 3 -// (D) with primary_roots (B1, C1) and residual_roots (B2, C2). The SDK is now -// responsible for processing A B1 C1 and the runner must process C2 D2 (and E -// from the prior split) in the future. Yet another future split request could -// have the SDK could split B1 further into B1a and B1b primary and residuals -// and return C2 as a residual (assuming C2 was left unprocessed). The SDK would -// return a response with a channel split representing a last_primary_element of -// 0 (A) and first_residual_element of 4 (E) with primary_roots (B1a) and -// residual_roots (B1b, C1). The SDK is now responsible for processing A B1a the -// runner must process B1b C1 (in addition to C2, D, E from prior splits) in the -// future. +// Example with three splits of a single bundle: +// Let's say the SDK is processing elements [A B C D E]. These elements make +// up the 0-indexed channel. +// +// ** First Split ** +// Channel Split = [ A B C D <> E ] +// Primary Roots = [] (No elements were split) +// Residual Roots = [] +// +// Say a split request comes in. The SDK could return a response with a channel +// split representing a last_primary_element of 3 (D) and +// first_residual_element of 4 (E). The SDK is now responsible for processing A +// B C D and the runner must process E in the future. +// +// (A B C D) | (E) +// +// ** Second Split ** +// Channel Split = [ A < B C > D E ] +// Primary Roots = [B1 C1] +// Residual Roots = [B2 C2] +// +// A future split request could have the SDK split the elements B into B1 and +// B2 and C into C1 and C2 representing their primary and residual roots. The +// +// (A B1 C1) | (B2 C2 D) +// +// SDK would return a response with a channel split representing a +// last_primary_element of 0 (A) and first_residual_element of 3 (D) with +// primary_roots (B1, C1) and residual_roots (B2, C2). The SDK is now +// responsible for processing A B1 C1 and the runner must process B2 C2 D (and +// E from the prior split) in the future. +// +// ** Third Split ** +// Channel Split = [ A < B C > D E ] +// Primary Roots = [B1a] +// Residual Roots [B1b C1] +// Yet another future split request could have the SDK could split B1 further +// into B1a and B1b primary and residuals and return C1 as a residual (assuming +// C1 was left unprocessed). The SDK would return a response with a channel +// split representing a last_primary_element of 0 (A) and +// first_residual_element of 3 (E) with primary_roots (B1a) and residual_roots +// (B1b, C1). The SDK is now responsible for processing A B1a the runner must +// process B1b C1 (in addition to C2, D, E from prior splits) in the future. +// +// (A B1a) | (B1b C1) // // For more rigorous definitions see https://s.apache.org/beam-breaking-fusion type ProcessBundleSplitResponse struct { diff --git a/sdks/go/pkg/beam/model/fnexecution_v1/beam_fn_api_grpc.pb.go b/sdks/go/pkg/beam/model/fnexecution_v1/beam_fn_api_grpc.pb.go index ac9e402750c4..cd53ea805705 100644 --- a/sdks/go/pkg/beam/model/fnexecution_v1/beam_fn_api_grpc.pb.go +++ b/sdks/go/pkg/beam/model/fnexecution_v1/beam_fn_api_grpc.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.1.0 -// - protoc v4.24.0--rc1 +// - protoc v4.24.4 // source: org/apache/beam/model/fn_execution/v1/beam_fn_api.proto package fnexecution_v1 diff --git a/sdks/go/pkg/beam/model/fnexecution_v1/beam_provision_api.pb.go b/sdks/go/pkg/beam/model/fnexecution_v1/beam_provision_api.pb.go index a24609b2fd05..26cf245f7206 100644 --- a/sdks/go/pkg/beam/model/fnexecution_v1/beam_provision_api.pb.go +++ b/sdks/go/pkg/beam/model/fnexecution_v1/beam_provision_api.pb.go @@ -22,7 +22,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v4.24.0--rc1 +// protoc v4.24.4 // source: org/apache/beam/model/fn_execution/v1/beam_provision_api.proto package fnexecution_v1 diff --git a/sdks/go/pkg/beam/model/fnexecution_v1/beam_provision_api_grpc.pb.go b/sdks/go/pkg/beam/model/fnexecution_v1/beam_provision_api_grpc.pb.go index f9c6f5681399..9064b348b4c0 100644 --- a/sdks/go/pkg/beam/model/fnexecution_v1/beam_provision_api_grpc.pb.go +++ b/sdks/go/pkg/beam/model/fnexecution_v1/beam_provision_api_grpc.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.1.0 -// - protoc v4.24.0--rc1 +// - protoc v4.24.4 // source: org/apache/beam/model/fn_execution/v1/beam_provision_api.proto package fnexecution_v1 diff --git a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_artifact_api.pb.go b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_artifact_api.pb.go index 6a7663d77e9c..85bb2e368970 100644 --- a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_artifact_api.pb.go +++ b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_artifact_api.pb.go @@ -22,7 +22,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v4.24.0--rc1 +// protoc v4.24.4 // source: org/apache/beam/model/job_management/v1/beam_artifact_api.proto package jobmanagement_v1 diff --git a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_artifact_api_grpc.pb.go b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_artifact_api_grpc.pb.go index 6b381b96f3d1..28e43e21fbbd 100644 --- a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_artifact_api_grpc.pb.go +++ b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_artifact_api_grpc.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.1.0 -// - protoc v4.24.0--rc1 +// - protoc v4.24.4 // source: org/apache/beam/model/job_management/v1/beam_artifact_api.proto package jobmanagement_v1 diff --git a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_expansion_api.pb.go b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_expansion_api.pb.go index 0f33c7ab9e3c..8f7ca43ec0f5 100644 --- a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_expansion_api.pb.go +++ b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_expansion_api.pb.go @@ -22,7 +22,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v4.24.0--rc1 +// protoc v4.24.4 // source: org/apache/beam/model/job_management/v1/beam_expansion_api.proto package jobmanagement_v1 diff --git a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_expansion_api_grpc.pb.go b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_expansion_api_grpc.pb.go index e2cc3c4f77ec..f1c3782f5fb8 100644 --- a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_expansion_api_grpc.pb.go +++ b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_expansion_api_grpc.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.1.0 -// - protoc v4.24.0--rc1 +// - protoc v4.24.4 // source: org/apache/beam/model/job_management/v1/beam_expansion_api.proto package jobmanagement_v1 diff --git a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_job_api.pb.go b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_job_api.pb.go index d93130d26d9f..62e0b313ec2d 100644 --- a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_job_api.pb.go +++ b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_job_api.pb.go @@ -22,7 +22,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v4.24.0--rc1 +// protoc v4.24.4 // source: org/apache/beam/model/job_management/v1/beam_job_api.proto package jobmanagement_v1 diff --git a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_job_api_grpc.pb.go b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_job_api_grpc.pb.go index 08da7e4643c3..38f2c85a1c1c 100644 --- a/sdks/go/pkg/beam/model/jobmanagement_v1/beam_job_api_grpc.pb.go +++ b/sdks/go/pkg/beam/model/jobmanagement_v1/beam_job_api_grpc.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.1.0 -// - protoc v4.24.0--rc1 +// - protoc v4.24.4 // source: org/apache/beam/model/job_management/v1/beam_job_api.proto package jobmanagement_v1 diff --git a/sdks/go/pkg/beam/model/pipeline_v1/beam_runner_api.pb.go b/sdks/go/pkg/beam/model/pipeline_v1/beam_runner_api.pb.go index b20a5dccbe05..49df2b5c2e59 100644 --- a/sdks/go/pkg/beam/model/pipeline_v1/beam_runner_api.pb.go +++ b/sdks/go/pkg/beam/model/pipeline_v1/beam_runner_api.pb.go @@ -22,7 +22,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v4.24.0--rc1 +// protoc v4.24.4 // source: org/apache/beam/model/pipeline/v1/beam_runner_api.proto package pipeline_v1 @@ -1857,6 +1857,10 @@ const ( // SDKs should convert the size to bytes, but can allow users to specify human-friendly units (e.g. GiB). // Payload: ASCII encoded string of the base 10 representation of an integer number of bytes. StandardResourceHints_MIN_RAM_BYTES StandardResourceHints_Enum = 1 + // Describes desired number of CPUs available in transform's execution environment. + // SDKs should accept and validate a positive integer count. + // Payload: ASCII encoded string of the base 10 representation of an integer number of CPUs. + StandardResourceHints_CPU_COUNT StandardResourceHints_Enum = 2 ) // Enum value maps for StandardResourceHints_Enum. @@ -1864,10 +1868,12 @@ var ( StandardResourceHints_Enum_name = map[int32]string{ 0: "ACCELERATOR", 1: "MIN_RAM_BYTES", + 2: "CPU_COUNT", } StandardResourceHints_Enum_value = map[string]int32{ "ACCELERATOR": 0, "MIN_RAM_BYTES": 1, + "CPU_COUNT": 2, } ) @@ -9223,42 +9229,45 @@ var file_org_apache_beam_model_pipeline_v1_beam_runner_api_proto_rawDesc = []byt 0x65, 0x63, 0x75, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x53, 0x74, 0x61, 0x67, 0x65, 0x50, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x72, 0x49, 0x64, 0x48, 0x00, 0x52, 0x05, 0x74, 0x69, 0x6d, 0x65, 0x72, 0x42, 0x08, 0x0a, 0x06, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x22, - 0x8f, 0x01, 0x0a, 0x15, 0x53, 0x74, 0x61, 0x6e, 0x64, 0x61, 0x72, 0x64, 0x52, 0x65, 0x73, 0x6f, - 0x75, 0x72, 0x63, 0x65, 0x48, 0x69, 0x6e, 0x74, 0x73, 0x22, 0x76, 0x0a, 0x04, 0x45, 0x6e, 0x75, - 0x6d, 0x12, 0x34, 0x0a, 0x0b, 0x41, 0x43, 0x43, 0x45, 0x4c, 0x45, 0x52, 0x41, 0x54, 0x4f, 0x52, - 0x10, 0x00, 0x1a, 0x23, 0xa2, 0xb4, 0xfa, 0xc2, 0x05, 0x1d, 0x62, 0x65, 0x61, 0x6d, 0x3a, 0x72, - 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x3a, 0x61, 0x63, 0x63, 0x65, 0x6c, 0x65, 0x72, - 0x61, 0x74, 0x6f, 0x72, 0x3a, 0x76, 0x31, 0x12, 0x38, 0x0a, 0x0d, 0x4d, 0x49, 0x4e, 0x5f, 0x52, - 0x41, 0x4d, 0x5f, 0x42, 0x59, 0x54, 0x45, 0x53, 0x10, 0x01, 0x1a, 0x25, 0xa2, 0xb4, 0xfa, 0xc2, - 0x05, 0x1f, 0x62, 0x65, 0x61, 0x6d, 0x3a, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, - 0x3a, 0x6d, 0x69, 0x6e, 0x5f, 0x72, 0x61, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x3a, 0x76, - 0x31, 0x32, 0x8f, 0x01, 0x0a, 0x11, 0x54, 0x65, 0x73, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, - 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x7a, 0x0a, 0x06, 0x45, 0x76, 0x65, 0x6e, 0x74, - 0x73, 0x12, 0x30, 0x2e, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x62, - 0x65, 0x61, 0x6d, 0x2e, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x2e, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, - 0x6e, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x1a, 0x3a, 0x2e, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, + 0xc2, 0x01, 0x0a, 0x15, 0x53, 0x74, 0x61, 0x6e, 0x64, 0x61, 0x72, 0x64, 0x52, 0x65, 0x73, 0x6f, + 0x75, 0x72, 0x63, 0x65, 0x48, 0x69, 0x6e, 0x74, 0x73, 0x22, 0xa8, 0x01, 0x0a, 0x04, 0x45, 0x6e, + 0x75, 0x6d, 0x12, 0x34, 0x0a, 0x0b, 0x41, 0x43, 0x43, 0x45, 0x4c, 0x45, 0x52, 0x41, 0x54, 0x4f, + 0x52, 0x10, 0x00, 0x1a, 0x23, 0xa2, 0xb4, 0xfa, 0xc2, 0x05, 0x1d, 0x62, 0x65, 0x61, 0x6d, 0x3a, + 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x3a, 0x61, 0x63, 0x63, 0x65, 0x6c, 0x65, + 0x72, 0x61, 0x74, 0x6f, 0x72, 0x3a, 0x76, 0x31, 0x12, 0x38, 0x0a, 0x0d, 0x4d, 0x49, 0x4e, 0x5f, + 0x52, 0x41, 0x4d, 0x5f, 0x42, 0x59, 0x54, 0x45, 0x53, 0x10, 0x01, 0x1a, 0x25, 0xa2, 0xb4, 0xfa, + 0xc2, 0x05, 0x1f, 0x62, 0x65, 0x61, 0x6d, 0x3a, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, + 0x73, 0x3a, 0x6d, 0x69, 0x6e, 0x5f, 0x72, 0x61, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x3a, + 0x76, 0x31, 0x12, 0x30, 0x0a, 0x09, 0x43, 0x50, 0x55, 0x5f, 0x43, 0x4f, 0x55, 0x4e, 0x54, 0x10, + 0x02, 0x1a, 0x21, 0xa2, 0xb4, 0xfa, 0xc2, 0x05, 0x1b, 0x62, 0x65, 0x61, 0x6d, 0x3a, 0x72, 0x65, + 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x3a, 0x63, 0x70, 0x75, 0x5f, 0x63, 0x6f, 0x75, 0x6e, + 0x74, 0x3a, 0x76, 0x31, 0x32, 0x8f, 0x01, 0x0a, 0x11, 0x54, 0x65, 0x73, 0x74, 0x53, 0x74, 0x72, + 0x65, 0x61, 0x6d, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x7a, 0x0a, 0x06, 0x45, 0x76, + 0x65, 0x6e, 0x74, 0x73, 0x12, 0x30, 0x2e, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, + 0x65, 0x2e, 0x62, 0x65, 0x61, 0x6d, 0x2e, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x2e, 0x70, 0x69, 0x70, + 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x3a, 0x2e, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, + 0x63, 0x68, 0x65, 0x2e, 0x62, 0x65, 0x61, 0x6d, 0x2e, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x2e, 0x70, + 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x54, 0x65, 0x73, 0x74, 0x53, + 0x74, 0x72, 0x65, 0x61, 0x6d, 0x50, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x2e, 0x45, 0x76, 0x65, + 0x6e, 0x74, 0x22, 0x00, 0x30, 0x01, 0x3a, 0x3f, 0x0a, 0x08, 0x62, 0x65, 0x61, 0x6d, 0x5f, 0x75, + 0x72, 0x6e, 0x12, 0x21, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0xc4, 0xa6, 0xaf, 0x58, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, + 0x62, 0x65, 0x61, 0x6d, 0x55, 0x72, 0x6e, 0x3a, 0x49, 0x0a, 0x0d, 0x62, 0x65, 0x61, 0x6d, 0x5f, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x12, 0x21, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, + 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x56, + 0x61, 0x6c, 0x75, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0xc5, 0xa6, 0xaf, 0x58, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x62, 0x65, 0x61, 0x6d, 0x43, 0x6f, 0x6e, 0x73, 0x74, 0x61, + 0x6e, 0x74, 0x42, 0x78, 0x0a, 0x21, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x62, 0x65, 0x61, 0x6d, 0x2e, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x2e, 0x70, 0x69, 0x70, 0x65, - 0x6c, 0x69, 0x6e, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x54, 0x65, 0x73, 0x74, 0x53, 0x74, 0x72, 0x65, - 0x61, 0x6d, 0x50, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x2e, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x22, - 0x00, 0x30, 0x01, 0x3a, 0x3f, 0x0a, 0x08, 0x62, 0x65, 0x61, 0x6d, 0x5f, 0x75, 0x72, 0x6e, 0x12, - 0x21, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, - 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x18, 0xc4, 0xa6, 0xaf, 0x58, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x62, 0x65, 0x61, - 0x6d, 0x55, 0x72, 0x6e, 0x3a, 0x49, 0x0a, 0x0d, 0x62, 0x65, 0x61, 0x6d, 0x5f, 0x63, 0x6f, 0x6e, - 0x73, 0x74, 0x61, 0x6e, 0x74, 0x12, 0x21, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, - 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0xc5, 0xa6, 0xaf, 0x58, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0c, 0x62, 0x65, 0x61, 0x6d, 0x43, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x42, - 0x78, 0x0a, 0x21, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x62, 0x65, - 0x61, 0x6d, 0x2e, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x2e, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, - 0x65, 0x2e, 0x76, 0x31, 0x42, 0x09, 0x52, 0x75, 0x6e, 0x6e, 0x65, 0x72, 0x41, 0x70, 0x69, 0x5a, - 0x48, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61, 0x70, 0x61, 0x63, - 0x68, 0x65, 0x2f, 0x62, 0x65, 0x61, 0x6d, 0x2f, 0x73, 0x64, 0x6b, 0x73, 0x2f, 0x76, 0x32, 0x2f, - 0x67, 0x6f, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x62, 0x65, 0x61, 0x6d, 0x2f, 0x6d, 0x6f, 0x64, 0x65, - 0x6c, 0x2f, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x76, 0x31, 0x3b, 0x70, 0x69, - 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x33, + 0x6c, 0x69, 0x6e, 0x65, 0x2e, 0x76, 0x31, 0x42, 0x09, 0x52, 0x75, 0x6e, 0x6e, 0x65, 0x72, 0x41, + 0x70, 0x69, 0x5a, 0x48, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61, + 0x70, 0x61, 0x63, 0x68, 0x65, 0x2f, 0x62, 0x65, 0x61, 0x6d, 0x2f, 0x73, 0x64, 0x6b, 0x73, 0x2f, + 0x76, 0x32, 0x2f, 0x67, 0x6f, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x62, 0x65, 0x61, 0x6d, 0x2f, 0x6d, + 0x6f, 0x64, 0x65, 0x6c, 0x2f, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x76, 0x31, + 0x3b, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/sdks/go/pkg/beam/model/pipeline_v1/beam_runner_api_grpc.pb.go b/sdks/go/pkg/beam/model/pipeline_v1/beam_runner_api_grpc.pb.go index d5e65f7b768d..20a30cf4dd01 100644 --- a/sdks/go/pkg/beam/model/pipeline_v1/beam_runner_api_grpc.pb.go +++ b/sdks/go/pkg/beam/model/pipeline_v1/beam_runner_api_grpc.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.1.0 -// - protoc v4.24.0--rc1 +// - protoc v4.24.4 // source: org/apache/beam/model/pipeline/v1/beam_runner_api.proto package pipeline_v1 diff --git a/sdks/go/pkg/beam/model/pipeline_v1/endpoints.pb.go b/sdks/go/pkg/beam/model/pipeline_v1/endpoints.pb.go index 74348ddc3b33..2dfaffa2bff0 100644 --- a/sdks/go/pkg/beam/model/pipeline_v1/endpoints.pb.go +++ b/sdks/go/pkg/beam/model/pipeline_v1/endpoints.pb.go @@ -21,7 +21,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v4.24.0--rc1 +// protoc v4.24.4 // source: org/apache/beam/model/pipeline/v1/endpoints.proto package pipeline_v1 diff --git a/sdks/go/pkg/beam/model/pipeline_v1/external_transforms.pb.go b/sdks/go/pkg/beam/model/pipeline_v1/external_transforms.pb.go index 0bc21a56685e..edbe82264f5e 100644 --- a/sdks/go/pkg/beam/model/pipeline_v1/external_transforms.pb.go +++ b/sdks/go/pkg/beam/model/pipeline_v1/external_transforms.pb.go @@ -21,7 +21,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v4.24.0--rc1 +// protoc v4.24.4 // source: org/apache/beam/model/pipeline/v1/external_transforms.proto package pipeline_v1 diff --git a/sdks/go/pkg/beam/model/pipeline_v1/metrics.pb.go b/sdks/go/pkg/beam/model/pipeline_v1/metrics.pb.go index ec3e0d704a80..60edad2363be 100644 --- a/sdks/go/pkg/beam/model/pipeline_v1/metrics.pb.go +++ b/sdks/go/pkg/beam/model/pipeline_v1/metrics.pb.go @@ -21,7 +21,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v4.24.0--rc1 +// protoc v4.24.4 // source: org/apache/beam/model/pipeline/v1/metrics.proto package pipeline_v1 diff --git a/sdks/go/pkg/beam/model/pipeline_v1/schema.pb.go b/sdks/go/pkg/beam/model/pipeline_v1/schema.pb.go index 717fbbfb7a69..4bc6a57044cd 100644 --- a/sdks/go/pkg/beam/model/pipeline_v1/schema.pb.go +++ b/sdks/go/pkg/beam/model/pipeline_v1/schema.pb.go @@ -24,7 +24,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v4.24.0--rc1 +// protoc v4.24.4 // source: org/apache/beam/model/pipeline/v1/schema.proto package pipeline_v1 diff --git a/sdks/go/pkg/beam/model/pipeline_v1/standard_window_fns.pb.go b/sdks/go/pkg/beam/model/pipeline_v1/standard_window_fns.pb.go index dccd7d427503..e0522806df73 100644 --- a/sdks/go/pkg/beam/model/pipeline_v1/standard_window_fns.pb.go +++ b/sdks/go/pkg/beam/model/pipeline_v1/standard_window_fns.pb.go @@ -22,7 +22,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v4.24.0--rc1 +// protoc v4.24.4 // source: org/apache/beam/model/pipeline/v1/standard_window_fns.proto package pipeline_v1 diff --git a/sdks/go/pkg/beam/options/resource/hint.go b/sdks/go/pkg/beam/options/resource/hint.go index 1538fe65def2..d823f4feafa9 100644 --- a/sdks/go/pkg/beam/options/resource/hint.go +++ b/sdks/go/pkg/beam/options/resource/hint.go @@ -196,3 +196,40 @@ func (h acceleratorHint) MergeWithOuter(outer Hint) Hint { func (h acceleratorHint) String() string { return fmt.Sprintf("accelerator=%v", h.value) } + +// CPUCount hints that this scope should be put in a machine with at least this many CPUs or vCPUs. +// +// Hints are advisory only and runners may not respect them. +// +// See https://beam.apache.org/documentation/runtime/resource-hints/ for more information about +// resource hints. +func CPUCount(v uint64) Hint { + return CPUCountHint{value: uint64(v)} +} + +type CPUCountHint struct { + value uint64 +} + +func (CPUCountHint) URN() string { + return "beam:resources:cpu_count:v1" +} + +func (h CPUCountHint) Payload() []byte { + // Go strings are utf8, and if the string is ascii, + // byte conversion handles that directly. + return []byte(strconv.FormatUint(h.value, 10)) +} + +// MergeWithOuter by keeping the maximum of the two cpu counts. +func (h CPUCountHint) MergeWithOuter(outer Hint) Hint { + // Intentional runtime panic from type assertion to catch hint merge errors. + if outer.(CPUCountHint).value > h.value { + return outer + } + return h +} + +func (h CPUCountHint) String() string { + return fmt.Sprintf("cpu_count=%v", humanize.Bytes(uint64(h.value))) +} diff --git a/sdks/go/pkg/beam/options/resource/hint_test.go b/sdks/go/pkg/beam/options/resource/hint_test.go index cf24b47b6c91..7c2a1df79294 100644 --- a/sdks/go/pkg/beam/options/resource/hint_test.go +++ b/sdks/go/pkg/beam/options/resource/hint_test.go @@ -111,6 +111,38 @@ func TestParseMinRAMHint_panic(t *testing.T) { ParseMinRAM("a bad byte string") } +func TestCPUCountHint_MergeWith(t *testing.T) { + low := CPUCountHint{value: 2} + high := CPUCountHint{value: 128} + + if got, want := low.MergeWithOuter(high), high; got != want { + t.Errorf("%v.MergeWith(%v) = %v, want %v", low, high, got, want) + } + if got, want := high.MergeWithOuter(low), high; got != want { + t.Errorf("%v.MergeWith(%v) = %v, want %v", high, low, got, want) + } +} + +func TestCPUCountHint_Payload(t *testing.T) { + tests := []struct { + value uint64 + payload string + }{ + {0, "0"}, + {2, "2"}, + {11, "11"}, + {2003, "2003"}, + {1.2e7, "12000000"}, + } + + for _, test := range tests { + h := CPUCountHint{value: test.value} + if got, want := h.Payload(), []byte(test.payload); !bytes.Equal(got, want) { + t.Errorf("%v.Payload() = %v, want %v", h, got, want) + } + } +} + // We copy the URN from the proto for use as a constant rather than perform a direct look up // each time, or increase initialization time. However we do need to validate that they are // correct, and match the standard hint urns, so that's done here. @@ -130,7 +162,11 @@ func TestStandardHintUrns(t *testing.T) { }, { h: MinRAMBytes(2e9), urn: getStandardURN(pipepb.StandardResourceHints_MIN_RAM_BYTES), + }, { + h: CPUCount(4), + urn: getStandardURN(pipepb.StandardResourceHints_CPU_COUNT), }} + for _, test := range tests { if got, want := test.h.URN(), test.urn; got != want { t.Errorf("Checked urn for %T, got %q, want %q", test.h, got, want) @@ -154,12 +190,12 @@ func (h customHint) MergeWithOuter(outer Hint) Hint { } func TestHints_Equal(t *testing.T) { - hs := NewHints(MinRAMBytes(2e9), Accelerator("type:pants;count1;install-pajamas")) + hs := NewHints(MinRAMBytes(2e9), Accelerator("type:pants;count1;install-pajamas"), CPUCount(4)) if got, want := hs.Equal(hs), true; got != want { t.Errorf("Self equal test: hs.Equal(hs) = %v, want %v", got, want) } - eq := NewHints(MinRAMBytes(2e9), Accelerator("type:pants;count1;install-pajamas")) + eq := NewHints(MinRAMBytes(2e9), Accelerator("type:pants;count1;install-pajamas"), CPUCount(4)) if got, want := hs.Equal(eq), true; got != want { t.Errorf("identical equal test: hs.Equal(eq) = %v, want %v", got, want) } @@ -223,12 +259,13 @@ func TestHints_MergeWithOuter(t *testing.T) { func TestHints_Payloads(t *testing.T) { { - hs := NewHints(MinRAMBytes(2e9), Accelerator("type:jeans;count1;")) + hs := NewHints(MinRAMBytes(2e9), Accelerator("type:jeans;count1;"), CPUCount(4)) got := hs.Payloads() want := map[string][]byte{ "beam:resources:min_ram_bytes:v1": []byte("2000000000"), "beam:resources:accelerator:v1": []byte("type:jeans;count1;"), + "beam:resources:cpu_count:v1": []byte("4"), } if !reflect.DeepEqual(got, want) { t.Errorf("hs.Payloads() = %v, want %v", got, want) @@ -248,7 +285,7 @@ func TestHints_Payloads(t *testing.T) { func TestHints_NilHints(t *testing.T) { var hs1, hs2 Hints - hs := NewHints(MinRAMBytes(2e9), Accelerator("type:pants;count1;install-pajamas")) + hs := NewHints(MinRAMBytes(2e9), Accelerator("type:pants;count1;install-pajamas"), CPUCount(4)) if got, want := hs1.Equal(hs2), true; got != want { t.Errorf("nils equal test: (nil).Equal(nil) = %v, want %v", got, want) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/resourcehints/ResourceHints.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/resourcehints/ResourceHints.java index afd6a6ccb151..85cb2df9deab 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/resourcehints/ResourceHints.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/resourcehints/ResourceHints.java @@ -49,6 +49,8 @@ public class ResourceHints { private static final String MIN_RAM_URN = "beam:resources:min_ram_bytes:v1"; private static final String ACCELERATOR_URN = "beam:resources:accelerator:v1"; + private static final String CPU_COUNT_URN = "beam:resources:cpu_count:v1"; + // TODO: reference this from a common location in all packages that use this. private static String getUrn(ProtocolMessageEnum value) { return value.getValueDescriptor().getOptions().getExtension(RunnerApi.beamUrn); @@ -57,6 +59,7 @@ private static String getUrn(ProtocolMessageEnum value) { static { checkState(MIN_RAM_URN.equals(getUrn(StandardResourceHints.Enum.MIN_RAM_BYTES))); checkState(ACCELERATOR_URN.equals(getUrn(StandardResourceHints.Enum.ACCELERATOR))); + checkState(CPU_COUNT_URN.equals(getUrn(StandardResourceHints.Enum.CPU_COUNT))); } private static ImmutableMap hintNameToUrn = @@ -64,12 +67,15 @@ private static String getUrn(ProtocolMessageEnum value) { .put("minRam", MIN_RAM_URN) .put("min_ram", MIN_RAM_URN) // Courtesy alias. .put("accelerator", ACCELERATOR_URN) + .put("cpuCount", CPU_COUNT_URN) + .put("cpu_count", CPU_COUNT_URN) // Courtesy alias. .build(); private static ImmutableMap> parsers = ImmutableMap.>builder() .put(MIN_RAM_URN, s -> new BytesHint(BytesHint.parse(s))) .put(ACCELERATOR_URN, s -> new StringHint(s)) + .put(CPU_COUNT_URN, s -> new IntHint(IntHint.parse(s))) .build(); private static final ResourceHints EMPTY = new ResourceHints(ImmutableMap.of()); @@ -212,6 +218,46 @@ public int hashCode() { } } + /*package*/ static class IntHint extends ResourceHint { + private final int value; + + @Override + public boolean equals(@Nullable Object other) { + if (other == null) { + return false; + } else if (this == other) { + return true; + } else if (other instanceof IntHint) { + return ((IntHint) other).value == value; + } else { + return false; + } + } + + @Override + public int hashCode() { + return Integer.hashCode(value); + } + + public IntHint(int value) { + this.value = value; + } + + public static int parse(String s) { + return Integer.parseInt(s, 10); + } + + @Override + public ResourceHint mergeWithOuter(ResourceHint outer) { + return new IntHint(Math.max(value, ((IntHint) outer).value)); + } + + @Override + public byte[] toBytes() { + return String.valueOf(value).getBytes(Charsets.US_ASCII); + } + } + /** * Sets desired minimal available RAM size to have in transform's execution environment. * @@ -264,6 +310,23 @@ public ResourceHints withHint(String urn, ResourceHint hint) { return new ResourceHints(newHints.build()); } + /** + * Sets desired minimal CPU or vCPU count to have in transform's execution environment. + * + * @param cpuCount specifies a positive CPU count. + */ + public ResourceHints withCPUCount(int cpuCount) { + if (cpuCount <= 0) { + LOG.error( + "Encountered invalid non-positive cpu count hint value {}.\n" + + "The value is ignored. In the future, The method will require an object Long type " + + "and throw an IllegalArgumentException for invalid values.", + cpuCount); + return this; + } + return withHint(CPU_COUNT_URN, new IntHint(cpuCount)); + } + public Map hints() { return hints; } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/resourcehints/ResourceHintsTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/resourcehints/ResourceHintsTest.java index 3cc522176374..c7643f718aa5 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/resourcehints/ResourceHintsTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/resourcehints/ResourceHintsTest.java @@ -92,10 +92,13 @@ public void testFromOptions() { .withHint("beam:resources:bar", new ResourceHints.StringHint("foo"))); options = PipelineOptionsFactory.fromArgs( - "--resourceHints=min_ram=1KB", "--resourceHints=accelerator=foo") + "--resourceHints=min_ram=1KB", + "--resourceHints=accelerator=foo", + "--resourceHints=cpu_count=4") .as(ResourceHintsOptions.class); - assertEquals( - ResourceHints.fromOptions(options), - ResourceHints.create().withMinRam(1000).withAccelerator("foo")); + ResourceHints fromOptions = ResourceHints.fromOptions(options); + ResourceHints expect = + ResourceHints.create().withMinRam(1000).withAccelerator("foo").withCPUCount(4); + assertEquals(fromOptions, expect); } } diff --git a/sdks/python/apache_beam/transforms/resources.py b/sdks/python/apache_beam/transforms/resources.py index 7bb202ab5660..7c4160df8edd 100644 --- a/sdks/python/apache_beam/transforms/resources.py +++ b/sdks/python/apache_beam/transforms/resources.py @@ -42,6 +42,7 @@ 'ResourceHint', 'AcceleratorHint', 'MinRamHint', + 'CpuCountHint', 'merge_resource_hints', 'parse_resource_hints', 'resource_hints_from_options', @@ -177,6 +178,21 @@ def get_merged_value( ResourceHint.register_resource_hint('minRam', MinRamHint) +class CpuCountHint(ResourceHint): + """Describes number of CPUs available in transform's execution environment.""" + urn = resource_hints.CPU_COUNT.urn + + @classmethod + def get_merged_value( + cls, outer_value, inner_value): # type: (bytes, bytes) -> bytes + return ResourceHint._use_max(outer_value, inner_value) + + +ResourceHint.register_resource_hint('cpu_count', CpuCountHint) +# Alias for interoperability with SDKs preferring camelCase. +ResourceHint.register_resource_hint('cpuCount', CpuCountHint) + + def parse_resource_hints(hints): # type: (Dict[Any, Any]) -> Dict[str, bytes] parsed_hints = {} for hint, value in hints.items(): diff --git a/sdks/python/apache_beam/transforms/resources_test.py b/sdks/python/apache_beam/transforms/resources_test.py index 939391b7adcb..939bdcd62651 100644 --- a/sdks/python/apache_beam/transforms/resources_test.py +++ b/sdks/python/apache_beam/transforms/resources_test.py @@ -46,6 +46,11 @@ class ResourcesTest(unittest.TestCase): val='gpu', urn='beam:resources:accelerator:v1', bytestr=b'gpu'), + param( + name='cpu_count', + val='4', + urn='beam:resources:cpu_count:v1', + bytestr=b'4'), ]) def test_known_resource_hints(self, name, val, urn, bytestr): t = PTransform() @@ -56,6 +61,7 @@ def test_known_resource_hints(self, name, val, urn, bytestr): @parameterized.expand([ param(name='min_ram', val='3,500G'), param(name='accelerator', val=1), + param(name='cpu_count', val=1), param(name='unknown_hint', val=1) ]) def test_resource_hint_parsing_fails_early(self, name, val): From 835bd655948d9a8ebc47277dedb25fb4c619f239 Mon Sep 17 00:00:00 2001 From: Danny McCormick Date: Thu, 12 Oct 2023 09:25:37 -0400 Subject: [PATCH 06/13] Add docs for per key inference (#28243) * Update KeyMhMapping to KeyModelMapping * Add docs for per key inference * Add piece on memory thrashing * Whitespace * Update wording based on feedback * Add references to website in pydoc * Apply suggestions from code review Co-authored-by: Rebecca Szper <98840847+rszper@users.noreply.github.com> * Remove ordering implied by wording * Lint fixes --------- Co-authored-by: Rebecca Szper <98840847+rszper@users.noreply.github.com> --- sdks/python/apache_beam/ml/inference/base.py | 9 +++- .../sdks/python-machine-learning.md | 48 +++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/ml/inference/base.py b/sdks/python/apache_beam/ml/inference/base.py index 90d43cfddb94..753e1468137c 100644 --- a/sdks/python/apache_beam/ml/inference/base.py +++ b/sdks/python/apache_beam/ml/inference/base.py @@ -482,6 +482,12 @@ def __init__( from the cohort. When model updates occur, the metrics will be reported in the form `--`. + Loading multiple models at the same time can increase the risk of an out of + memory (OOM) exception. To avoid this issue, use the parameter + `max_models_per_worker_hint` to limit the number of models that are loaded + at the same time. For more information about memory management, see + `Use a keyed `ModelHandler _`. # pylint: disable=line-too-long + Args: unkeyed: Either (a) an implementation of ModelHandler that does not @@ -491,7 +497,8 @@ def __init__( models can be held in memory at one time per worker process. For example, if your worker has 8 GB of memory provisioned and your workers take up 1 GB each, you should set this to 7 to allow all models to sit - in memory with some buffer. + in memory with some buffer. For more information about memory management, + see `Use a keyed `ModelHandler _`. # pylint: disable=line-too-long """ self._metrics_collectors: Dict[str, _MetricsCollector] = {} self._default_metrics_collector: _MetricsCollector = None diff --git a/website/www/site/content/en/documentation/sdks/python-machine-learning.md b/website/www/site/content/en/documentation/sdks/python-machine-learning.md index 5e0cf483ff3e..0076fa370b0f 100644 --- a/website/www/site/content/en/documentation/sdks/python-machine-learning.md +++ b/website/www/site/content/en/documentation/sdks/python-machine-learning.md @@ -215,6 +215,54 @@ with pipeline as p: If you are unsure if your data is keyed, you can also use `MaybeKeyedModelHandler`. +You can also use a `KeyedModelHandler` to load several different models based on their associated key: + +``` +from apache_beam.ml.inference.base import KeyedModelHandler +keyed_model_handler = KeyedModelHandler([ + KeyModelMapping(['key1'], PytorchModelHandlerTensor()), + KeyModelMapping(['key2', 'key3'], PytorchModelHandlerTensor()) +]) +with pipeline as p: + data = p | beam.Create([ + ('key1', torch.tensor([[1,2,3],[4,5,6],...])), + ('key2', torch.tensor([[1,2,3],[4,5,6],...])), + ('key3', torch.tensor([[1,2,3],[4,5,6],...])), + ]) + predictions = data | RunInference(keyed_model_handler) +``` + +The previous example loads a model by using `config1`. That model is then used for inference for all examples associated +with `key1`. It also loads a model by using `config2`. That model is used for all examples associated with `key2` and `key3`. + +Loading multiple models at the same times increases the risk of out of memory (OOM) errors. By default, `KeyedModelHandler` doesn't +limit the number of models loaded into memory at the same time. If the models don't all fit into memory, +your pipeline will likely fail with an out of memory error. To avoid this issue, provide a hint about the +maximum number of models that can be loaded at the same time. + +``` +mhs = [ + KeyModelMapping(['key1'], PytorchModelHandlerTensor()), + KeyModelMapping(['key2', 'key3'], PytorchModelHandlerTensor()), + KeyModelMapping(['key4'], PytorchModelHandlerTensor()), + KeyModelMapping(['key5', 'key6', 'key7'], PytorchModelHandlerTensor()), +] +keyed_model_handler = KeyedModelHandler(mhs, max_models_per_worker_hint=2) +``` + +The previous example loads at most two models per SDK worker process at any given time. It unloads models that aren't +currently being used. Runners that have multiple SDK worker processes on a given machine load at most +`max_models_per_worker_hint*` models onto the machine. Leave enough space for the models +and any additional memory needs from other transforms. Because there might be a delay between when a model is offloaded and when the +memory is released, it is recommended that you leave additional buffer. + +**Note**: Having many models but a small `max_models_per_worker_hint` can lead to _memory thrashing_, where +a large amount of execution time is wasted swapping models in and out of memory. To reduce the likelihood and impact +of memory thrashing, if you're using a distributed runner, insert a +[GroupByKey](https://beam.apache.org/documentation/transforms/python/aggregation/groupbykey/) transform before your +inference step. This step reduces thrashing by ensuring that elements with the same key and model are +collocated on the same worker. + For more information, see [`KeyedModelHander`](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.inference.base.html#apache_beam.ml.inference.base.KeyedModelHandler). ### Use the `PredictionResult` object From fbd147f83548f7586eccb0a364d7804318fbac86 Mon Sep 17 00:00:00 2001 From: Danny McCormick Date: Thu, 12 Oct 2023 09:27:14 -0400 Subject: [PATCH 07/13] Use latest released beam in notebook (#28801) --- examples/notebooks/beam-ml/per_key_models.ipynb | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/examples/notebooks/beam-ml/per_key_models.ipynb b/examples/notebooks/beam-ml/per_key_models.ipynb index b529449555d0..a5dfa0d54e31 100644 --- a/examples/notebooks/beam-ml/per_key_models.ipynb +++ b/examples/notebooks/beam-ml/per_key_models.ipynb @@ -107,12 +107,7 @@ } ], "source": [ - "# Note that this notebook currently installs from Beam head since this feature hasn't been released yet.\n", - "# It will be released with version 2.51.0, at which point you can install with the following command:\n", - "# !pip install apache_beam[gcp]>=2.51.0 --quiet\n", - "!git clone https://github.com/apache/beam\n", - "!pip install -r beam/sdks/python/build-requirements.txt\n", - "!pip install -e ./beam/sdks/python[gcp]\n", + "!pip install apache_beam[gcp]>=2.51.0 --quiet\n", "!pip install torch --quiet\n", "!pip install transformers --quiet\n", "\n", From 7449b67a6d1019c82d852a69ae8cb093199335e6 Mon Sep 17 00:00:00 2001 From: Rebecca Szper <98840847+rszper@users.noreply.github.com> Date: Thu, 12 Oct 2023 06:54:39 -0700 Subject: [PATCH 08/13] Editing notebooks to prepare for DevSite import (#28949) --- .../beam-ml/automatic_model_refresh.ipynb | 8 ++++---- .../notebooks/beam-ml/mltransform_basic.ipynb | 6 ++---- .../notebooks/beam-ml/per_key_models.ipynb | 18 +++++++++--------- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/examples/notebooks/beam-ml/automatic_model_refresh.ipynb b/examples/notebooks/beam-ml/automatic_model_refresh.ipynb index 9cbab0a14178..3bafa4f07887 100644 --- a/examples/notebooks/beam-ml/automatic_model_refresh.ipynb +++ b/examples/notebooks/beam-ml/automatic_model_refresh.ipynb @@ -248,7 +248,7 @@ " This example uses `TFModelHandlerTensor` as the model handler and the `resnet_101` model trained on [ImageNet](https://www.image-net.org/).\n", "\n", "\n", - "For DataflowRunner, the model needs to be stored remote location accessible by the Beam pipeline. So we will download `ResNet101` model and upload it to the GCS location.\n" + "For the Dataflow runner, you need to store the model in a remote location that the Apache Beam pipeline can access. For this example, download the `ResNet101` model, and upload it to the Google Cloud Storage bucket.\n" ], "metadata": { "id": "_AUNH_GJk_NE" @@ -392,7 +392,7 @@ "source": [ "2. To read and preprocess the images, use the `preprocess_image` function. This example uses `Cat-with-beanie.jpg` for all inferences.\n", "\n", - " **Note**: Image used for prediction is licensed in CC-BY. The creator is listed in the [LICENSE.txt](https://storage.googleapis.com/apache-beam-samples/image_captioning/LICENSE.txt) file." + " **Note**: The image used for prediction is licensed in CC-BY. The creator is listed in the [LICENSE.txt](https://storage.googleapis.com/apache-beam-samples/image_captioning/LICENSE.txt) file." ], "metadata": { "id": "8-sal2rFAxP2" @@ -424,7 +424,7 @@ "cell_type": "markdown", "source": [ "3. Pass the images to the RunInference `PTransform`. RunInference takes `model_handler` and `model_metadata_pcoll` as input parameters.\n", - " * `model_metadata_pcoll` is a side input `PCollection` to the RunInference `PTransform`. This side input is used to update the `model_uri` in the `model_handler` without needing to stop the Apache Beam pipeline\n", + " * `model_metadata_pcoll` is a side input `PCollection` to the RunInference `PTransform`. This side input updates the `model_uri` in the `model_handler` while the Apache Beam pipeline runs.\n", " * Use `WatchFilePattern` as side input to watch a `file_pattern` matching `.keras` files. In this case, the `file_pattern` is `'gs://BUCKET_NAME/dataflow/*keras'`.\n", "\n" ], @@ -483,7 +483,7 @@ "source": [ "### Watch for the model update\n", "\n", - "After the pipeline starts processing data and when you see output emitted from the RunInference `PTransform`, upload a `resnet152` model saved in `.keras` format to a Google Cloud Storage bucket location that matches the `file_pattern` you defined earlier.\n" + "After the pipeline starts processing data, when you see output emitted from the RunInference `PTransform`, upload a `resnet152` model saved in the `.keras` format to a Google Cloud Storage bucket location that matches the `file_pattern` you defined earlier.\n" ], "metadata": { "id": "wYp-mBHHjOjA" diff --git a/examples/notebooks/beam-ml/mltransform_basic.ipynb b/examples/notebooks/beam-ml/mltransform_basic.ipynb index 820bc3400b58..fd305bddb3ba 100644 --- a/examples/notebooks/beam-ml/mltransform_basic.ipynb +++ b/examples/notebooks/beam-ml/mltransform_basic.ipynb @@ -65,7 +65,7 @@ "id": "d3b81cf2-8603-42bd-995e-9e14631effd0" }, "source": [ - "This notebook demonstrates how to use `MLTransform` to preprocess your data for machine learning models. `MLTransform` is a `PTransform` that wraps multiple Apache Beam data processing transforms. As a result, `MLTransform` gives you the ability to preprocess different types of data in multiple ways with one transform.\n", + "This notebook demonstrates how to use `MLTransform` to preprocess your data for machine learning models. `MLTransform` is a `PTransform` that wraps multiple Apache Beam data processing transforms. With `MLTransform`, you can preprocess different types of data in multiple ways with one transform.\n", "\n", "This notebook uses data processing transforms defined in the [apache_beam/ml/transforms/tft](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.transforms.tft.html) module." ] @@ -423,8 +423,6 @@ "source": [ "### Scale the data by using the z-score\n", "\n", - "Scale to the data using the z-score\n", - "\n", "Similar to `ScaleTo01`, use [ScaleToZScore](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.transforms.tft.html#apache_beam.ml.transforms.tft.ScaleToZScore) to scale the values by using the [z-score]([z-score](https://www.tensorflow.org/tfx/transform/api_docs/python/tft/scale_to_z_score#:~:text=Scaling%20to%20z%2Dscore%20subtracts%20out%20the%20mean%20and%20divides%20by%20standard%20deviation.%20Note%20that%20the%20standard%20deviation%20computed%20here%20is%20based%20on%20the%20biased%20variance%20(0%20delta%20degrees%20of%20freedom)%2C%20as%20computed%20by%20analyzers.var.).\n" ], "metadata": { @@ -607,7 +605,7 @@ "\n", "The previous examples show how to preprocess data for model training. This example uses the same preprocessing steps on the inference data. By using the same steps on the inference data, you can maintain consistent results.\n", "\n", - "Preprocess the data going into the inference by using the same preprocessing steps used on the data prior to training. To do this with `MLTransform`, pass the artifact location from the previous transforms to the parameter `read_artifact_location`. `MLTransform` uses the values and artifacts produced in the previous steps. You don't need to provide the transforms, because they are saved with the artifacts in the artifact location.\n" + "Preprocess the data used by the inference by using the same preprocessing steps that you used on the data prior to training. When using `MLTransform`, pass the artifact location from the previous transforms to the parameter `read_artifact_location`. `MLTransform` uses the values and artifacts produced in the previous steps. You don't need to provide the transforms, because they are saved with the artifacts in the artifact location.\n" ], "metadata": { "id": "kcnQSwkA-eSA" diff --git a/examples/notebooks/beam-ml/per_key_models.ipynb b/examples/notebooks/beam-ml/per_key_models.ipynb index a5dfa0d54e31..53845c0b3e19 100644 --- a/examples/notebooks/beam-ml/per_key_models.ipynb +++ b/examples/notebooks/beam-ml/per_key_models.ipynb @@ -70,7 +70,7 @@ "\n", "In Apache Beam, the recommended way to run inference is to use the `RunInference` transform. By using a `KeyedModelHandler`, you can efficiently run inference with O(100s) of models without having to manage memory yourself.\n", "\n", - "This notebook demonstrates how to use a `KeyedModelHandler` to run inference in an Apache Beam pipeline with multiple different models on a per-key basis. This notebook uses pretrained pipelines from Hugging Face. Before continuing with this notebook, it is recommended that you walk through the [beginner RunInference notebook](https://colab.sandbox.google.com/github/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_pytorch_tensorflow_sklearn.ipynb)." + "This notebook demonstrates how to use a `KeyedModelHandler` to run inference in an Apache Beam pipeline with multiple different models on a per-key basis. This notebook uses pretrained pipelines from Hugging Face. Before continuing with this notebook, it is recommended that you walk through the [Use RunInference in Apache Beam](https://colab.sandbox.google.com/github/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_pytorch_tensorflow_sklearn.ipynb) notebook." ], "metadata": { "id": "ZAVOrrW2An1n" @@ -81,7 +81,7 @@ "source": [ "## Install dependencies\n", "\n", - "First, install both Apache Beam and the dependencies needed by Hugging Face." + "Install both Apache Beam and the dependencies needed by Hugging Face." ], "metadata": { "id": "_fNyheQoDgGt" @@ -144,7 +144,7 @@ "\n", "A model handler is the Apache Beam method used to define the configuration needed to load and invoke models. Because this example uses two models, we define two model handlers, one for each model. Because both models are incapsulated within Hugging Face pipelines, we use the model handler `HuggingFacePipelineModelHandler`.\n", "\n", - "In this notebook, we load the models using Hugging Face and run them against an example. The models produce different outputs." + "For this example, load the models using Hugging Face, and then run them against an example. The models produce different outputs." ], "metadata": { "id": "uEqljVgCD7hx" @@ -350,7 +350,7 @@ "source": [ "## Define the examples\n", "\n", - "Next, define examples to input into the pipeline. The examples include their correct classifications." + "Define examples to input into the pipeline. The examples include the correct classifications." ], "metadata": { "id": "yd92MC7YEsTf" @@ -387,7 +387,7 @@ "class FormatExamples(beam.DoFn):\n", " \"\"\"\n", " Map each example to a tuple of ('-', 'example').\n", - " We use these keys to map our elements to the correct models.\n", + " Use these keys to map our elements to the correct models.\n", " \"\"\"\n", " def process(self, element: Tuple[str, str]) -> Iterable[Tuple[str, str]]:\n", " yield (f'distilbert-{element[1]}', element[0])\n", @@ -402,7 +402,7 @@ { "cell_type": "markdown", "source": [ - "Use the formatted keys to define a `KeyedModelHandler` that maps keys to the `ModelHandler` used for those keys. The `KeyedModelHandler` method lets you define an optional `max_models_per_worker_hint`, which limits the number of models that can be held in a single worker process at one time. If you're worried about your worker running out of memory, use this option. For more information about managing memory, see [Use a keyed ModelHandler](https://beam.apache.org/documentation/sdks/python-machine-learning/index.html#use-a-keyed-modelhandler)." + "Use the formatted keys to define a `KeyedModelHandler` that maps keys to the `ModelHandler` used for those keys. The `KeyedModelHandler` method lets you define an optional `max_models_per_worker_hint`, which limits the number of models that can be held in a single worker process at one time. If your worker might run out of memory, use this option. For more information about managing memory, see [Use a keyed ModelHandler](https://beam.apache.org/documentation/sdks/python-machine-learning/index.html#use-a-keyed-modelhandler)." ], "metadata": { "id": "IP65_5nNGIb8" @@ -428,9 +428,9 @@ "source": [ "## Postprocess the results\n", "\n", - "The `RunInference` transform returns a Tuple containing:\n", + "The `RunInference` transform returns a tuple that contains the following objects:\n", "* the original key\n", - "* a `PredictionResult` object containing the original example and the inference.\n", + "* a `PredictionResult` object containing the original example and the inference\n", "Use those outputs to extract the relevant data. Then, to compare each model's prediction, group this data by the original example." ], "metadata": { @@ -505,7 +505,7 @@ "source": [ "## Run the pipeline\n", "\n", - "Put together all of the pieces to run a single Apache Beam pipeline." + "To run a single Apache Beam pipeline, combine the previous steps." ], "metadata": { "id": "-LrpmM2PGAkf" From 223dded769df48270df317868dc32144ec2fb353 Mon Sep 17 00:00:00 2001 From: Vitaly Terentyev Date: Thu, 12 Oct 2023 18:57:35 +0400 Subject: [PATCH 09/13] Add Inference Python Benchmarks Dataflow workflow (#28943) --- ...m_Inference_Python_Benchmarks_Dataflow.yml | 144 ++++++++++++++++++ ...rch_Imagenet_Classification_Resnet_152.txt | 34 +++++ ...Classification_Resnet_152_Tesla_T4_GPU.txt | 36 +++++ ...ch_Language_Modeling_Bert_Base_Uncased.txt | 34 +++++ ...h_Language_Modeling_Bert_Large_Uncased.txt | 34 +++++ ...torch_Vision_Classification_Resnet_101.txt | 34 +++++ 6 files changed, 316 insertions(+) create mode 100644 .github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml create mode 100644 .github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Imagenet_Classification_Resnet_152.txt create mode 100644 .github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Imagenet_Classification_Resnet_152_Tesla_T4_GPU.txt create mode 100644 .github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Language_Modeling_Bert_Base_Uncased.txt create mode 100644 .github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Language_Modeling_Bert_Large_Uncased.txt create mode 100644 .github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Vision_Classification_Resnet_101.txt diff --git a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml new file mode 100644 index 000000000000..117dc39a0fb7 --- /dev/null +++ b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml @@ -0,0 +1,144 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Inference Python Benchmarks Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '50 3 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_Inference_Python_Benchmarks_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Inference Benchmarks' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 900 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_Inference_Python_Benchmarks_Dataflow"] + job_phrase: ["Run Inference Benchmarks"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup Python environment + uses: ./.github/actions/setup-environment-action + with: + python-version: '3.8' + - name: Prepare test arguments + uses: ./.github/actions/test-arguments-action + with: + test-type: load + test-language: python + argument-file-paths: | + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Vision_Classification_Resnet_101.txt + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Imagenet_Classification_Resnet_152.txt + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Language_Modeling_Bert_Base_Uncased.txt + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Language_Modeling_Bert_Large_Uncased.txt + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Imagenet_Classification_Resnet_152_Tesla_T4_GPU.txt + # The env variables are created and populated in the test-arguments-action as "_test_arguments_" + - name: get current time + run: echo "NOW_UTC=$(date '+%m%d%H%M%S' --utc)" >> $GITHUB_ENV + - name: run Pytorch Vision Classification with Resnet 101 + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_1 }} --job_name=benchmark-tests-pytorch-imagenet-python-101-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet101-${{env.NOW_UTC}}.txt' \ + - name: run Pytorch Imagenet Classification with Resnet 152 + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-pytorch-imagenet-python-152-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152-${{env.NOW_UTC}}.txt' \ + - name: run Pytorch Language Modeling using Hugging face bert-base-uncased model + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_3 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-base-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_base_uncased-${{env.NOW_UTC}}.txt' \ + - name: run Pytorch Langauge Modeling using Hugging Face bert-large-uncased model + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_4 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-large-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_large_uncased-${{env.NOW_UTC}}.txt' \ + - name: run Pytorch Imagenet Classification with Resnet 152 with Tesla T4 GPU + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_5 }} --job_name=benchmark-tests-pytorch-imagenet-python-gpu-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152_gpu-${{env.NOW_UTC}}.txt' \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Imagenet_Classification_Resnet_152.txt b/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Imagenet_Classification_Resnet_152.txt new file mode 100644 index 000000000000..c65317b49573 --- /dev/null +++ b/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Imagenet_Classification_Resnet_152.txt @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--region=us-central1 +--machine_type=n1-standard-2 +--num_workers=75 +--disk_size_gb=50 +--autoscaling_algorithm=NONE +--staging_location=gs://temp-storage-for-perf-tests/loadtests +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--requirements_file=apache_beam/ml/inference/torch_tests_requirements.txt +--publish_to_big_query=true +--metrics_dataset=beam_run_inference +--metrics_table=torch_inference_imagenet_results_resnet152 +--input_options={} +--influx_measurement=torch_inference_imagenet_resnet152 +--pretrained_model_name=resnet152 +--device=CPU +--input_file=gs://apache-beam-ml/testing/inputs/openimage_50k_benchmark.txt +--model_state_dict_path=gs://apache-beam-ml/models/torchvision.models.resnet152.pth +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Imagenet_Classification_Resnet_152_Tesla_T4_GPU.txt b/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Imagenet_Classification_Resnet_152_Tesla_T4_GPU.txt new file mode 100644 index 000000000000..c1b7e273c6e8 --- /dev/null +++ b/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Imagenet_Classification_Resnet_152_Tesla_T4_GPU.txt @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--region=us-central1 +--machine_type=n1-standard-2 +--num_workers=75 +--disk_size_gb=50 +--autoscaling_algorithm=NONE +--staging_location=gs://temp-storage-for-perf-tests/loadtests +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--requirements_file=apache_beam/ml/inference/torch_tests_requirements.txt +--publish_to_big_query=true +--metrics_dataset=beam_run_inference +--metrics_table=torch_inference_imagenet_results_resnet152_tesla_t4 +--input_options={} +--influx_measurement=torch_inference_imagenet_resnet152_tesla_t4 +--pretrained_model_name=resnet152 +--device=GPU +--experiments=worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver +--sdk_container_image=us.gcr.io/apache-beam-testing/python-postcommit-it/tensor_rt:latest +--input_file=gs://apache-beam-ml/testing/inputs/openimage_50k_benchmark.txt +--model_state_dict_path=gs://apache-beam-ml/models/torchvision.models.resnet152.pth +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Language_Modeling_Bert_Base_Uncased.txt b/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Language_Modeling_Bert_Base_Uncased.txt new file mode 100644 index 000000000000..66aca5fdbcd7 --- /dev/null +++ b/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Language_Modeling_Bert_Base_Uncased.txt @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--region=us-central1 +--machine_type=n1-standard-2 +--num_workers=250 +--disk_size_gb=50 +--autoscaling_algorithm=NONE +--staging_location=gs://temp-storage-for-perf-tests/loadtests +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--requirements_file=apache_beam/ml/inference/torch_tests_requirements.txt +--publish_to_big_query=true +--metrics_dataset=beam_run_inference +--metrics_table=torch_language_modeling_bert_base_uncased +--input_options={} +--influx_measurement=torch_language_modeling_bert_base_uncased +--device=CPU +--input_file=gs://apache-beam-ml/testing/inputs/sentences_50k.txt +--bert_tokenizer=bert-base-uncased +--model_state_dict_path=gs://apache-beam-ml/models/huggingface.BertForMaskedLM.bert-base-uncased.pth +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Language_Modeling_Bert_Large_Uncased.txt b/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Language_Modeling_Bert_Large_Uncased.txt new file mode 100644 index 000000000000..d6406271685b --- /dev/null +++ b/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Language_Modeling_Bert_Large_Uncased.txt @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--region=us-central1 +--machine_type=n1-standard-2 +--num_workers=250 +--disk_size_gb=50 +--autoscaling_algorithm=NONE +--staging_location=gs://temp-storage-for-perf-tests/loadtests +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--requirements_file=apache_beam/ml/inference/torch_tests_requirements.txt +--publish_to_big_query=true +--metrics_dataset=beam_run_inference +--metrics_table=torch_language_modeling_bert_large_uncased +--input_options={} +--influx_measurement=torch_language_modeling_bert_large_uncased +--device=CPU +--input_file=gs://apache-beam-ml/testing/inputs/sentences_50k.txt +--bert_tokenizer=bert-large-uncased +--model_state_dict_path=gs://apache-beam-ml/models/huggingface.BertForMaskedLM.bert-large-uncased.pth +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Vision_Classification_Resnet_101.txt b/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Vision_Classification_Resnet_101.txt new file mode 100644 index 000000000000..5a0d25043909 --- /dev/null +++ b/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Vision_Classification_Resnet_101.txt @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--region=us-central1 +--machine_type=n1-standard-2 +--num_workers=75 +--disk_size_gb=50 +--autoscaling_algorithm=NONE +--staging_location=gs://temp-storage-for-perf-tests/loadtests +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--requirements_file=apache_beam/ml/inference/torch_tests_requirements.txt +--publish_to_big_query=true +--metrics_dataset=beam_run_inference +--metrics_table=torch_inference_imagenet_results_resnet101 +--input_options={} +--influx_measurement=torch_inference_imagenet_resnet101 +--pretrained_model_name=resnet101 +--device=CPU +--input_file=gs://apache-beam-ml/testing/inputs/openimage_50k_benchmark.txt +--model_state_dict_path=gs://apache-beam-ml/models/torchvision.models.resnet101.pth +--runner=DataflowRunner \ No newline at end of file From 63b99fb9cbe676df1aa37c37f1f14e26f1433946 Mon Sep 17 00:00:00 2001 From: Jack McCluskey <34928439+jrmccluskey@users.noreply.github.com> Date: Thu, 12 Oct 2023 14:43:38 -0400 Subject: [PATCH 10/13] Remove govulncheck for Go Test workflow (#28966) --- .github/workflows/go_tests.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/go_tests.yml b/.github/workflows/go_tests.yml index 66c57d219a83..02947eff5ca0 100644 --- a/.github/workflows/go_tests.yml +++ b/.github/workflows/go_tests.yml @@ -68,8 +68,3 @@ jobs: go install "honnef.co/go/tools/cmd/staticcheck@2023.1.3" cd sdks/go/pkg/beam $(go env GOPATH)/bin/staticcheck ./... - - uses: golang/govulncheck-action@v1.0.1 - with: - work-dir: ./sdks - go-package: ./... - go-version-input: 1.21 From 3e066e3524a7f8b04945e67750867d9021903cd9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Oct 2023 14:56:00 -0400 Subject: [PATCH 11/13] Bump com.gradle.common-custom-user-data-gradle-plugin (#28762) Bumps com.gradle.common-custom-user-data-gradle-plugin from 1.10 to 1.11.3. --- updated-dependencies: - dependency-name: com.gradle.common-custom-user-data-gradle-plugin dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- settings.gradle.kts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/settings.gradle.kts b/settings.gradle.kts index d1069ec2d352..b71ce249c155 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -25,7 +25,7 @@ pluginManagement { plugins { id("com.gradle.enterprise") version "3.13.2" - id("com.gradle.common-custom-user-data-gradle-plugin") version "1.10" + id("com.gradle.common-custom-user-data-gradle-plugin") version "1.11.3" } From a94d29ffd21710eac3e88be9325346171368e219 Mon Sep 17 00:00:00 2001 From: Anand Inguva <34158215+AnandInguva@users.noreply.github.com> Date: Thu, 12 Oct 2023 19:14:39 +0000 Subject: [PATCH 12/13] Add pyproject.toml (#28385) * Add pyproject.toml Update numpy bounds * Use subprocess to run grpcio since it is not imported in pyproject.toml Update setup.py Remove ImportError from gen_protos.py Update subprocess run and raise RuntimeError if proto generation fails Print output of setup.py Fix linting issues * Remove build-requirements.txt and use build to build the sdist Modify buildPython task * Use wheels to run tox precommit tests Upgrade pip in Dockerfile Move _ELEMENTS to shared file. tests are not importable by each other Add missing element Remove shared_test_variables Remove installing wheel in a test suite Retry run_tox.sh with no installPkg flag Remove natural language test. codepath is covered in the postCommits. Add back tox exit code * Remove cython tests. default tests will run with Cython extensions FIx toxTask name Add no-extra test suite to precommit and remove GH duplicate ubuntu test Skip failing non-cython test Fix tox test name * Force type cast inputs to list * Update stager to use build. If it fails, use legacy setup to build sdist Fix mypy issue * Remove cython env and build-requirements for tox.ini --- .github/workflows/beam_PreCommit_Python.yml | 1 + .../beam_PreCommit_Python_Coverage.yml | 2 + .../beam_PreCommit_Python_Dataframes.yml | 1 + .../beam_PreCommit_Python_Examples.yml | 1 + .../beam_PreCommit_Python_Runners.yml | 1 + .../beam_PreCommit_Python_Transforms.yml | 1 + .github/workflows/build_release_candidate.yml | 2 +- .github/workflows/build_wheels.yml | 17 +-- .github/workflows/dask_runner_tests.yml | 8 +- .github/workflows/python_dependency_tests.yml | 5 +- .github/workflows/python_tests.yml | 16 +-- .github/workflows/run_perf_alert_tool.yml | 3 - .github/workflows/typescript_tests.yml | 7 +- .../jenkins/job_PreCommit_Python.groovy | 1 + .../job_PreCommit_Python_Coverage.groovy | 3 + .../job_PreCommit_Python_Dataframes.groovy | 3 +- .../job_PreCommit_Python_Examples.groovy | 3 +- .../job_PreCommit_Python_Runners.groovy | 3 +- .../job_PreCommit_Python_Transforms.groovy | 3 +- .../beam/gradle/BeamModulePlugin.groovy | 40 ++++--- .../main/scripts/build_release_candidate.sh | 2 +- .../apache_beam/coders/slow_coders_test.py | 3 + .../apache_beam/examples/inference/README.md | 1 - .../apache_beam/examples/kafkataxi/README.md | 4 +- .../io/azure/integration_test/Dockerfile | 2 +- .../apache_beam/io/gcp/bigquery_test.py | 39 ++++++- .../io/hdfs_integration_test/Dockerfile | 2 +- .../ml/gcp/naturallanguageml_test.py | 19 ---- sdks/python/apache_beam/runners/common.py | 4 +- .../apache_beam/runners/portability/stager.py | 28 +++-- sdks/python/build-requirements.txt | 28 ----- sdks/python/build.gradle | 7 +- sdks/python/container/Dockerfile | 2 +- .../base_image_requirements_manual.txt | 1 + sdks/python/gen_protos.py | 105 ++++++------------ sdks/python/pyproject.toml | 36 ++++++ sdks/python/scripts/run_pytest.sh | 4 +- sdks/python/scripts/run_tox.sh | 19 +++- sdks/python/setup.py | 59 ++++++---- sdks/python/test-suites/tox/common.gradle | 12 +- .../python/test-suites/tox/py310/build.gradle | 2 - .../python/test-suites/tox/py311/build.gradle | 2 - sdks/python/test-suites/tox/py38/build.gradle | 2 - sdks/python/test-suites/tox/py39/build.gradle | 3 - sdks/python/tox.ini | 32 +----- .../ml/multi-language-inference.md | 2 +- .../sdks/python-pipeline-dependencies.md | 13 ++- 47 files changed, 268 insertions(+), 286 deletions(-) delete mode 100644 sdks/python/build-requirements.txt create mode 100644 sdks/python/pyproject.toml diff --git a/.github/workflows/beam_PreCommit_Python.yml b/.github/workflows/beam_PreCommit_Python.yml index c891a79cefd0..35e7b937068d 100644 --- a/.github/workflows/beam_PreCommit_Python.yml +++ b/.github/workflows/beam_PreCommit_Python.yml @@ -97,6 +97,7 @@ jobs: arguments: | -Pposargs="--ignore=apache_beam/dataframe/ --ignore=apache_beam/examples/ --ignore=apache_beam/runners/ --ignore=apache_beam/transforms/" \ -PpythonVersion=${{ matrix.python_version }} \ + -PuseWheelDistribution - name: Archive code coverage results uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/beam_PreCommit_Python_Coverage.yml b/.github/workflows/beam_PreCommit_Python_Coverage.yml index 33be9644d34a..4b274d643e07 100644 --- a/.github/workflows/beam_PreCommit_Python_Coverage.yml +++ b/.github/workflows/beam_PreCommit_Python_Coverage.yml @@ -86,6 +86,8 @@ jobs: uses: ./.github/actions/gradle-command-self-hosted-action with: gradle-command: :sdks:python:test-suites:tox:py38:preCommitPyCoverage + arguments: | + -PuseWheelDistribution - name: Archive code coverage results uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/beam_PreCommit_Python_Dataframes.yml b/.github/workflows/beam_PreCommit_Python_Dataframes.yml index 2862d7d5936c..f03716d06795 100644 --- a/.github/workflows/beam_PreCommit_Python_Dataframes.yml +++ b/.github/workflows/beam_PreCommit_Python_Dataframes.yml @@ -97,6 +97,7 @@ jobs: arguments: | -Pposargs=apache_beam/dataframe/ \ -PpythonVersion=${{ matrix.python_version }} \ + -PuseWheelDistribution - name: Archive code coverage results uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/beam_PreCommit_Python_Examples.yml b/.github/workflows/beam_PreCommit_Python_Examples.yml index 7f980885180a..d629ee09b725 100644 --- a/.github/workflows/beam_PreCommit_Python_Examples.yml +++ b/.github/workflows/beam_PreCommit_Python_Examples.yml @@ -97,6 +97,7 @@ jobs: arguments: | -Pposargs=apache_beam/examples/ \ -PpythonVersion=${{ matrix.python_version }} \ + -PuseWheelDistribution - name: Archive code coverage results uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/beam_PreCommit_Python_Runners.yml b/.github/workflows/beam_PreCommit_Python_Runners.yml index b0c5ab4fa34a..f823112e23dd 100644 --- a/.github/workflows/beam_PreCommit_Python_Runners.yml +++ b/.github/workflows/beam_PreCommit_Python_Runners.yml @@ -97,6 +97,7 @@ jobs: arguments: | -Pposargs=apache_beam/runners/ \ -PpythonVersion=${{ matrix.python_version }} \ + -PuseWheelDistribution - name: Archive code coverage results uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/beam_PreCommit_Python_Transforms.yml b/.github/workflows/beam_PreCommit_Python_Transforms.yml index 88ac59c9de96..7374af7f38c9 100644 --- a/.github/workflows/beam_PreCommit_Python_Transforms.yml +++ b/.github/workflows/beam_PreCommit_Python_Transforms.yml @@ -97,6 +97,7 @@ jobs: arguments: | -Pposargs=apache_beam/transforms/ \ -PpythonVersion=${{ matrix.python_version }} \ + -PuseWheelDistribution - name: Archive code coverage results uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/build_release_candidate.yml b/.github/workflows/build_release_candidate.yml index 3956551431c6..12f1537dac18 100644 --- a/.github/workflows/build_release_candidate.yml +++ b/.github/workflows/build_release_candidate.yml @@ -314,7 +314,7 @@ jobs: pip install -U pip pip install tox # TODO(https://github.com/apache/beam/issues/20209): Don't hardcode py version in this file. - pip install -r build-requirements.txt && tox -e py38-docs + tox -e py38-docs rm -rf target/docs/_build/.doctrees - name: Build Typescript Docs working-directory: beam/sdks/typescript diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 94248be0c008..f4ccf368bacb 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -92,11 +92,6 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.8 - - name: Get build dependencies - working-directory: ./sdks/python - run: python -m pip install -r build-requirements.txt - - name: Install wheels - run: python -m pip install wheel - name: Get tag id: get_tag run: | @@ -117,15 +112,15 @@ jobs: echo "RELEASE_VERSION=$RELEASE_VERSION" >> $GITHUB_OUTPUT - name: Build source working-directory: ./sdks/python - run: python setup.py sdist --formats=zip + run: pip install -U build && python -m build --sdist - name: Add checksums working-directory: ./sdks/python/dist run: | - file=$(ls | grep .zip | head -n 1) + file=$(ls | grep .tar.gz | head -n 1) sha512sum $file > ${file}.sha512 - name: Unzip source working-directory: ./sdks/python - run: unzip dist/$(ls dist | grep .zip | head -n 1) + run: tar -xzvf dist/$(ls dist | grep .tar.gz | head -n 1) - name: Rename source directory working-directory: ./sdks/python run: mv $(ls | grep apache-beam) apache-beam-source @@ -155,17 +150,17 @@ jobs: - name: Build RC source if: steps.is_rc.outputs.is_rc == 1 working-directory: ./sdks/python - run: python setup.py sdist --formats=zip + run: pip install -U build && pythom -m build --sdist - name: Add RC checksums if: steps.is_rc.outputs.is_rc == 1 working-directory: ./sdks/python/dist run: | - file=$(ls | grep .zip | head -n 1) + file=$(ls | grep .tar.gz | head -n 1) sha512sum $file > ${file}.sha512 - name: Unzip RC source if: steps.is_rc.outputs.is_rc == 1 working-directory: ./sdks/python - run: unzip dist/$(ls dist | grep .zip | head -n 1) + run: tar -xzvf dist/$(ls dist | grep .tar.gz | head -n 1) - name: Rename RC source directory if: steps.is_rc.outputs.is_rc == 1 working-directory: ./sdks/python diff --git a/.github/workflows/dask_runner_tests.yml b/.github/workflows/dask_runner_tests.yml index 423a304db825..35c320086992 100644 --- a/.github/workflows/dask_runner_tests.yml +++ b/.github/workflows/dask_runner_tests.yml @@ -44,12 +44,9 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.8 - - name: Get build dependencies - working-directory: ./sdks/python - run: pip install pip setuptools --upgrade && pip install -r build-requirements.txt - name: Build source working-directory: ./sdks/python - run: python setup.py sdist + run: pip install -U build && python -m build --sdist - name: Rename source file working-directory: ./sdks/python/dist run: mv $(ls | grep "apache-beam.*tar\.gz") apache-beam-source.tar.gz @@ -78,9 +75,6 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.params.py_ver }} - - name: Get build dependencies - working-directory: ./sdks/python - run: pip install -r build-requirements.txt - name: Install tox run: pip install tox - name: Install SDK with dask diff --git a/.github/workflows/python_dependency_tests.yml b/.github/workflows/python_dependency_tests.yml index 6fd865bda754..166899df90cb 100644 --- a/.github/workflows/python_dependency_tests.yml +++ b/.github/workflows/python_dependency_tests.yml @@ -7,7 +7,7 @@ on: branches: ['master', 'release-*'] tags: 'v*' # paths where Beam Python's dependencies are configured. - paths: ['sdks/python/setup.py', 'sdks/python/build-requirements.txt', 'sdks/python/container/base_image_requirements_manual.txt'] + paths: ['sdks/python/setup.py', 'sdks/python/pyproject.toml', 'sdks/python/container/base_image_requirements_manual.txt'] # This allows a subsequently queued workflow run to interrupt previous runs concurrency: @@ -38,9 +38,6 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.params.py_ver }} - - name: Install Build dependencies - working-directory: ./sdks/python - run: pip install -r build-requirements.txt - name: Install base_image_requirements.txt working-directory: ./sdks/python run: pip install --no-deps -r container/${{ matrix.params.py_env }}/base_image_requirements.txt diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index 57ec895c2431..0309329e84e1 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -78,12 +78,9 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.8 - - name: Get build dependencies - working-directory: ./sdks/python - run: pip install pip setuptools --upgrade && pip install -r build-requirements.txt - name: Build source working-directory: ./sdks/python - run: python setup.py sdist + run: pip install -U build && python -m build --sdist - name: Rename source file working-directory: ./sdks/python/dist run: mv $(ls | grep "apache-beam.*tar\.gz") apache-beam-source.tar.gz @@ -99,7 +96,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest, windows-latest] + os: [macos-latest, windows-latest] params: [ {"py_ver": "3.8", "tox_env": "py38"}, {"py_ver": "3.9", "tox_env": "py39"}, @@ -113,9 +110,6 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.params.py_ver }} - - name: Get build dependencies - working-directory: ./sdks/python - run: pip install -r build-requirements.txt --use-pep517 - name: Install tox run: pip install tox - name: Run tests basic unix @@ -148,9 +142,6 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} - - name: Get build dependencies - working-directory: ./sdks/python - run: pip install -r build-requirements.txt - name: Install requirements working-directory: ./sdks/python run: pip install setuptools --upgrade && pip install -e . @@ -192,9 +183,6 @@ jobs: service_account_key: ${{ secrets.GCP_SA_KEY }} project_id: ${{ secrets.GCP_PROJECT_ID }} export_default_credentials: true - - name: Get build dependencies - working-directory: ./sdks/python - run: pip install -r build-requirements.txt - name: Install requirements working-directory: ./sdks/python run: pip install setuptools --upgrade && pip install -e ".[gcp]" diff --git a/.github/workflows/run_perf_alert_tool.yml b/.github/workflows/run_perf_alert_tool.yml index 1bd8d525c2fb..c61665c1bc7c 100644 --- a/.github/workflows/run_perf_alert_tool.yml +++ b/.github/workflows/run_perf_alert_tool.yml @@ -46,9 +46,6 @@ jobs: with: service_account_key: ${{ secrets.GCP_SA_KEY }} export_default_credentials: true - - name: Get Apache Beam Build dependencies - working-directory: ./sdks/python - run: pip install pip setuptools --upgrade && pip install -r build-requirements.txt - name: Install Apache Beam working-directory: ./sdks/python run: pip install -e .[gcp,test] diff --git a/.github/workflows/typescript_tests.yml b/.github/workflows/typescript_tests.yml index a4aa14c42efd..edbe8399e7d8 100644 --- a/.github/workflows/typescript_tests.yml +++ b/.github/workflows/typescript_tests.yml @@ -89,10 +89,8 @@ jobs: - name: Setup Beam Python working-directory: ./sdks/python run: | - pip install pip setuptools --upgrade - pip install -r build-requirements.txt pip install 'pandas>=1.0,<1.5' - python setup.py develop + pip install -e . - run: npm ci working-directory: ./sdks/typescript - run: npm run build @@ -146,10 +144,7 @@ jobs: - name: Setup Beam Python working-directory: ./sdks/python run: | - pip install pip setuptools --upgrade - pip install -r build-requirements.txt pip install 'pandas>=1.0,<1.5' - python setup.py develop pip install -e ".[gcp]" - name: Authenticate on GCP uses: google-github-actions/setup-gcloud@v0 diff --git a/.test-infra/jenkins/job_PreCommit_Python.groovy b/.test-infra/jenkins/job_PreCommit_Python.groovy index 0e439d788877..9c9740e3c97e 100644 --- a/.test-infra/jenkins/job_PreCommit_Python.groovy +++ b/.test-infra/jenkins/job_PreCommit_Python.groovy @@ -29,6 +29,7 @@ PrecommitJobBuilder builder = new PrecommitJobBuilder( '^release/.*$', ], gradleSwitches: [ + '-PuseWheelDistribution', '-Pposargs=\"--ignore=apache_beam/dataframe/ --ignore=apache_beam/examples/ --ignore=apache_beam/runners/ --ignore=apache_beam/transforms/\"' // All these tests are covered by different jobs. ], numBuildsToRetain: 40 diff --git a/.test-infra/jenkins/job_PreCommit_Python_Coverage.groovy b/.test-infra/jenkins/job_PreCommit_Python_Coverage.groovy index c0cb48cf6231..43a204fd7cfc 100644 --- a/.test-infra/jenkins/job_PreCommit_Python_Coverage.groovy +++ b/.test-infra/jenkins/job_PreCommit_Python_Coverage.groovy @@ -22,6 +22,9 @@ PrecommitJobBuilder builder = new PrecommitJobBuilder( scope: this, nameBase: 'Python_Coverage', gradleTask: ':sdks:python:test-suites:tox:py38:preCommitPyCoverage', + gradleSwitches: [ + '-PuseWheelDistribution' + ], timeoutMins: 180, triggerPathPatterns: [ '^model/.*$', diff --git a/.test-infra/jenkins/job_PreCommit_Python_Dataframes.groovy b/.test-infra/jenkins/job_PreCommit_Python_Dataframes.groovy index e2914e9bdb8e..dea034f613a5 100644 --- a/.test-infra/jenkins/job_PreCommit_Python_Dataframes.groovy +++ b/.test-infra/jenkins/job_PreCommit_Python_Dataframes.groovy @@ -23,7 +23,8 @@ PrecommitJobBuilder builder = new PrecommitJobBuilder( nameBase: 'Python_Dataframes', gradleTask: ':pythonPreCommit', gradleSwitches: [ - '-Pposargs=apache_beam/dataframe/' + '-Pposargs=apache_beam/dataframe/', + '-PuseWheelDistribution' ], timeoutMins: 180, triggerPathPatterns: [ diff --git a/.test-infra/jenkins/job_PreCommit_Python_Examples.groovy b/.test-infra/jenkins/job_PreCommit_Python_Examples.groovy index f4ef9f51d7fb..3dd7bf6f6f47 100644 --- a/.test-infra/jenkins/job_PreCommit_Python_Examples.groovy +++ b/.test-infra/jenkins/job_PreCommit_Python_Examples.groovy @@ -23,7 +23,8 @@ PrecommitJobBuilder builder = new PrecommitJobBuilder( nameBase: 'Python_Examples', gradleTask: ':pythonPreCommit', gradleSwitches: [ - '-Pposargs=apache_beam/examples/' + '-Pposargs=apache_beam/examples/', + '-PuseWheelDistribution' ], timeoutMins: 180, triggerPathPatterns: [ diff --git a/.test-infra/jenkins/job_PreCommit_Python_Runners.groovy b/.test-infra/jenkins/job_PreCommit_Python_Runners.groovy index e80dba6cf5cd..4ae1d283b7a9 100644 --- a/.test-infra/jenkins/job_PreCommit_Python_Runners.groovy +++ b/.test-infra/jenkins/job_PreCommit_Python_Runners.groovy @@ -23,7 +23,8 @@ PrecommitJobBuilder builder = new PrecommitJobBuilder( nameBase: 'Python_Runners', gradleTask: ':pythonPreCommit', gradleSwitches: [ - '-Pposargs=apache_beam/runners/' + '-Pposargs=apache_beam/runners/', + '-PuseWheelDistribution' ], timeoutMins: 180, triggerPathPatterns: [ diff --git a/.test-infra/jenkins/job_PreCommit_Python_Transforms.groovy b/.test-infra/jenkins/job_PreCommit_Python_Transforms.groovy index dd16d48b1731..ccd3f08b78ab 100644 --- a/.test-infra/jenkins/job_PreCommit_Python_Transforms.groovy +++ b/.test-infra/jenkins/job_PreCommit_Python_Transforms.groovy @@ -23,7 +23,8 @@ PrecommitJobBuilder builder = new PrecommitJobBuilder( nameBase: 'Python_Transforms', gradleTask: ':pythonPreCommit', gradleSwitches: [ - '-Pposargs=apache_beam/transforms/' + '-Pposargs=apache_beam/transforms/', + '-PuseWheelDistribution' ], timeoutMins: 180, triggerPathPatterns: [ diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index c32717aae725..6fa5ff0ee5f3 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -2998,7 +2998,7 @@ class BeamModulePlugin implements Plugin { executable 'sh' args '-c', ". ${project.ext.envdir}/bin/activate && " + "pip install --pre --retries 10 --upgrade pip && " + - "pip install --pre --retries 10 --upgrade tox -r ${project.rootDir}/sdks/python/build-requirements.txt" + "pip install --pre --retries 10 --upgrade tox" } } // Gradle will delete outputs whenever it thinks they are stale. Putting a @@ -3081,30 +3081,40 @@ class BeamModulePlugin implements Plugin { } return argList.join(' ') } - project.ext.toxTask = { name, tox_env, posargs='' -> project.tasks.register(name) { dependsOn setupVirtualenv dependsOn ':sdks:python:sdist' - - doLast { - // Python source directory is also tox execution workspace, We want - // to isolate them per tox suite to avoid conflict when running - // multiple tox suites in parallel. - project.copy { from project.pythonSdkDeps; into copiedSrcRoot } - - def copiedPyRoot = "${copiedSrcRoot}/sdks/python" - def distTarBall = "${pythonRootDir}/build/apache-beam.tar.gz" - project.exec { - executable 'sh' - args '-c', ". ${project.ext.envdir}/bin/activate && cd ${copiedPyRoot} && scripts/run_tox.sh $tox_env $distTarBall '$posargs'" + if (project.hasProperty('useWheelDistribution')) { + def pythonVersionNumber = project.ext.pythonVersion.replace('.', '') + dependsOn ":sdks:python:bdistPy${pythonVersionNumber}linux" + doLast { + project.copy { from project.pythonSdkDeps; into copiedSrcRoot } + def copiedPyRoot = "${copiedSrcRoot}/sdks/python" + def collection = project.fileTree(project.project(':sdks:python').buildDir){ + include "**/apache_beam-*cp${pythonVersionNumber}*manylinux*.whl" + } + String packageFilename = collection.singleFile.toString() + project.exec { + executable 'sh' + args '-c', ". ${project.ext.envdir}/bin/activate && cd ${copiedPyRoot} && scripts/run_tox.sh $tox_env ${packageFilename} '$posargs' " + } + } + } else { + // tox task will run in editable mode, which is configured in the tox.ini file. + doLast { + project.copy { from project.pythonSdkDeps; into copiedSrcRoot } + def copiedPyRoot = "${copiedSrcRoot}/sdks/python" + project.exec { + executable 'sh' + args '-c', ". ${project.ext.envdir}/bin/activate && cd ${copiedPyRoot} && scripts/run_tox.sh $tox_env '$posargs'" + } } } inputs.files project.pythonSdkDeps outputs.files project.fileTree(dir: "${pythonRootDir}/target/.tox/${tox_env}/log/") } } - // Run single or a set of integration tests with provided test options and pipeline options. project.ext.enablePythonPerformanceTest = { diff --git a/release/src/main/scripts/build_release_candidate.sh b/release/src/main/scripts/build_release_candidate.sh index 057a38833adc..d0e6310f50aa 100755 --- a/release/src/main/scripts/build_release_candidate.sh +++ b/release/src/main/scripts/build_release_candidate.sh @@ -346,7 +346,7 @@ if [[ $confirmation = "y" ]]; then cd ${BEAM_ROOT_DIR} RELEASE_COMMIT=$(git rev-list -n 1 "tags/${RC_TAG}") # TODO(https://github.com/apache/beam/issues/20209): Don't hardcode py version in this file. - cd sdks/python && pip install -r build-requirements.txt && tox -e py38-docs + cd sdks/python && tox -e py38-docs GENERATED_PYDOC=~/${LOCAL_WEBSITE_UPDATE_DIR}/${LOCAL_PYTHON_DOC}/${BEAM_ROOT_DIR}/sdks/python/target/docs/_build rm -rf ${GENERATED_PYDOC}/.doctrees diff --git a/sdks/python/apache_beam/coders/slow_coders_test.py b/sdks/python/apache_beam/coders/slow_coders_test.py index fe1c707a62e5..7915116a19a3 100644 --- a/sdks/python/apache_beam/coders/slow_coders_test.py +++ b/sdks/python/apache_beam/coders/slow_coders_test.py @@ -25,6 +25,9 @@ from apache_beam.coders.coders_test_common import * +@unittest.skip( + 'Remove non-cython tests.' + 'https://github.com/apache/beam/issues/28307') class SlowCoders(unittest.TestCase): def test_using_slow_impl(self): try: diff --git a/sdks/python/apache_beam/examples/inference/README.md b/sdks/python/apache_beam/examples/inference/README.md index 19262dead586..cd92d9c127ee 100644 --- a/sdks/python/apache_beam/examples/inference/README.md +++ b/sdks/python/apache_beam/examples/inference/README.md @@ -29,7 +29,6 @@ Some examples are also used in [our benchmarks](http://s.apache.org/beam-communi You must have the latest (possibly unreleased) `apache-beam` or greater installed from the Beam repo in order to run these pipelines, because some examples rely on the latest features that are actively in development. To install Beam, run the following from the `sdks/python` directory: ``` -pip install -r build-requirements.txt pip install -e .[gcp] ``` diff --git a/sdks/python/apache_beam/examples/kafkataxi/README.md b/sdks/python/apache_beam/examples/kafkataxi/README.md index c4e808cad8b4..72a8d8f85c03 100644 --- a/sdks/python/apache_beam/examples/kafkataxi/README.md +++ b/sdks/python/apache_beam/examples/kafkataxi/README.md @@ -157,9 +157,9 @@ Install Beam and dependencies and build a Beam distribution. ```sh cd beam/sdks/python -pip install -r build-requirements.txt pip install -e '.[gcp]' -python setup.py sdist +pip install -q build +python -m build --sdist ``` Run the Beam pipeline. You can either use the default Kafka topic name or specify diff --git a/sdks/python/apache_beam/io/azure/integration_test/Dockerfile b/sdks/python/apache_beam/io/azure/integration_test/Dockerfile index e9ac396b8e17..257fa72cb668 100644 --- a/sdks/python/apache_beam/io/azure/integration_test/Dockerfile +++ b/sdks/python/apache_beam/io/azure/integration_test/Dockerfile @@ -32,7 +32,7 @@ COPY sdks/python /app/sdks/python COPY model /app/model # This step should look like setupVirtualenv minus virtualenv creation. -RUN pip install --no-cache-dir tox -r sdks/python/build-requirements.txt +RUN pip install --no-cache-dir tox # Add Azurite's self-signed cert to the global CA cert store. COPY cert.pem /usr/local/share/ca-certificates/azurite.crt diff --git a/sdks/python/apache_beam/io/gcp/bigquery_test.py b/sdks/python/apache_beam/io/gcp/bigquery_test.py index 7e9c1e634748..95b6c2a5fa60 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_test.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_test.py @@ -49,14 +49,12 @@ from apache_beam.io.gcp.bigquery import TableRowJsonCoder from apache_beam.io.gcp.bigquery import WriteToBigQuery from apache_beam.io.gcp.bigquery import _StreamToBigQuery -from apache_beam.io.gcp.bigquery_file_loads_test import _ELEMENTS from apache_beam.io.gcp.bigquery_read_internal import _JsonToDictCoder from apache_beam.io.gcp.bigquery_read_internal import bigquery_export_destination_uri from apache_beam.io.gcp.bigquery_tools import JSON_COMPLIANCE_ERROR from apache_beam.io.gcp.bigquery_tools import BigQueryWrapper from apache_beam.io.gcp.bigquery_tools import RetryStrategy from apache_beam.io.gcp.internal.clients import bigquery -from apache_beam.io.gcp.internal.clients.bigquery import bigquery_v2_client from apache_beam.io.gcp.pubsub import ReadFromPubSub from apache_beam.io.gcp.tests import utils from apache_beam.io.gcp.tests.bigquery_matcher import BigqueryFullResultMatcher @@ -82,6 +80,7 @@ # pylint: disable=wrong-import-order, wrong-import-position try: + from apache_beam.io.gcp.internal.clients.bigquery import bigquery_v2_client from apitools.base.py.exceptions import HttpError from google.cloud import bigquery as gcp_bigquery from google.api_core import exceptions @@ -93,6 +92,42 @@ _LOGGER = logging.getLogger(__name__) +_ELEMENTS = [ + { + 'name': 'beam', 'language': 'py' + }, + { + 'name': 'beam', 'language': 'java' + }, + { + 'name': 'beam', 'language': 'go' + }, + { + 'name': 'flink', 'language': 'java' + }, + { + 'name': 'flink', 'language': 'scala' + }, + { + 'name': 'spark', 'language': 'scala' + }, + { + 'name': 'spark', 'language': 'py' + }, + { + 'name': 'spark', 'language': 'scala' + }, + { + 'name': 'beam', 'foundation': 'apache' + }, + { + 'name': 'flink', 'foundation': 'apache' + }, + { + 'name': 'spark', 'foundation': 'apache' + }, +] + def _load_or_default(filename): try: diff --git a/sdks/python/apache_beam/io/hdfs_integration_test/Dockerfile b/sdks/python/apache_beam/io/hdfs_integration_test/Dockerfile index 487d5c3487ab..ab7940563394 100644 --- a/sdks/python/apache_beam/io/hdfs_integration_test/Dockerfile +++ b/sdks/python/apache_beam/io/hdfs_integration_test/Dockerfile @@ -30,7 +30,7 @@ COPY sdks/python /app/sdks/python COPY model /app/model # This step should look like setupVirtualenv minus virtualenv creation. -RUN pip install --no-cache-dir tox -r sdks/python/build-requirements.txt +RUN pip install --no-cache-dir tox # Run wordcount, and write results to HDFS. CMD cd sdks/python && tox -e hdfs_integration_test diff --git a/sdks/python/apache_beam/ml/gcp/naturallanguageml_test.py b/sdks/python/apache_beam/ml/gcp/naturallanguageml_test.py index bad7443d0d94..891726cb2688 100644 --- a/sdks/python/apache_beam/ml/gcp/naturallanguageml_test.py +++ b/sdks/python/apache_beam/ml/gcp/naturallanguageml_test.py @@ -20,11 +20,7 @@ import unittest -import mock - -import apache_beam as beam from apache_beam.metrics import MetricsFilter -from apache_beam.testing.test_pipeline import TestPipeline # Protect against environments where Google Cloud Natural Language client # is not available. @@ -60,21 +56,6 @@ def test_document_source(self): self.assertFalse('content' in dict_) self.assertTrue('gcs_content_uri' in dict_) - def test_annotate_test_called(self): - with mock.patch('apache_beam.ml.gcp.naturallanguageml._AnnotateTextFn' - '._get_api_client'): - p = TestPipeline() - features = [ - naturallanguageml.language_v1.AnnotateTextRequest.Features( - extract_syntax=True) - ] - _ = ( - p | beam.Create([naturallanguageml.Document('Hello, world!')]) - | naturallanguageml.AnnotateText(features)) - result = p.run() - result.wait_until_finish() - self.assertCounterEqual(result, 'api_calls', 1) - if __name__ == '__main__': unittest.main() diff --git a/sdks/python/apache_beam/runners/common.py b/sdks/python/apache_beam/runners/common.py index 99cd26cc4098..ed0dc2d9a0c1 100644 --- a/sdks/python/apache_beam/runners/common.py +++ b/sdks/python/apache_beam/runners/common.py @@ -765,6 +765,7 @@ def __init__(self, # Try to prepare all the arguments that can just be filled in # without any additional work. in the process function. # Also cache all the placeholders needed in the process function. + input_args = list(input_args) ( self.placeholders_for_process, self.args_for_process, @@ -1437,7 +1438,8 @@ def process(self, windowed_value): return [] def _maybe_sample_exception( - self, exn: BaseException, windowed_value: WindowedValue) -> None: + self, exn: BaseException, + windowed_value: Optional[WindowedValue]) -> None: if self.execution_context is None: return diff --git a/sdks/python/apache_beam/runners/portability/stager.py b/sdks/python/apache_beam/runners/portability/stager.py index ace573de0a62..4afe5eaaa370 100644 --- a/sdks/python/apache_beam/runners/portability/stager.py +++ b/sdks/python/apache_beam/runners/portability/stager.py @@ -49,6 +49,7 @@ import glob import hashlib +import importlib.util import logging import os import shutil @@ -771,13 +772,26 @@ def _build_setup_package(setup_file, # type: str try: os.chdir(os.path.dirname(setup_file)) if build_setup_args is None: - build_setup_args = [ - Stager._get_python_executable(), - os.path.basename(setup_file), - 'sdist', - '--dist-dir', - temp_dir - ] + # if build is installed in the user env, use it to + # build the sdist else fallback to legacy setup.py sdist call. + if importlib.util.find_spec('build'): + build_setup_args = [ + Stager._get_python_executable(), + '-m', + 'build', + '--sdist', + '--outdir', + temp_dir, + os.path.dirname(setup_file), + ] + else: + build_setup_args = [ + Stager._get_python_executable(), + os.path.basename(setup_file), + 'sdist', + '--dist-dir', + temp_dir + ] _LOGGER.info('Executing command: %s', build_setup_args) processes.check_output(build_setup_args) output_files = glob.glob(os.path.join(temp_dir, '*.tar.gz')) diff --git a/sdks/python/build-requirements.txt b/sdks/python/build-requirements.txt deleted file mode 100644 index 4fe47079d8d0..000000000000 --- a/sdks/python/build-requirements.txt +++ /dev/null @@ -1,28 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# TODO(https://github.com/apache/beam/issues/20051): Consider PEP-517/PEP-518 instead of this file. - -setuptools -wheel>=0.36.0 -grpcio-tools==1.53.0 -mypy-protobuf==3.5.0 -# Avoid https://github.com/pypa/virtualenv/issues/2006 -distlib==0.3.7 - -# Numpy headers -numpy>=1.14.3,<1.26 diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index 762bed268d63..7795e77e3963 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -30,7 +30,8 @@ def buildPython = tasks.register("buildPython") { logger.info('Building Python Dependencies') exec { executable 'sh' - args '-c', ". ${envdir}/bin/activate && python setup.py build --build-base ${buildDir}" + // args '-c', ". ${envdir}/bin/activate && python setup.py build --build-base ${buildDir}" + args '-c', ". ${envdir}/bin/activate && pip install -e ." } } } @@ -46,7 +47,7 @@ def sdist = tasks.register("sdist") { // Build artifact exec { executable 'sh' - args '-c', ". ${envdir}/bin/activate && python setup.py -q sdist --formats zip,gztar --dist-dir ${buildDir}" + args '-c', ". ${envdir}/bin/activate && pip install -U build && python -m build --sdist --outdir=${buildDir}" } def collection = fileTree(buildDir){ include "**/*${project.sdk_version}*.tar.gz" exclude 'srcs/**'} @@ -96,7 +97,6 @@ platform_identifiers_map.each { platform, idsuffix -> exec { environment CIBW_BUILD: "cp${pyversion}-${idsuffix}" environment CIBW_ENVIRONMENT: "SETUPTOOLS_USE_DISTUTILS=stdlib" - environment CIBW_BEFORE_BUILD: "pip install cython==0.29.36 numpy --config-settings=setup-args='-Dallow-noblas=true' && pip install --upgrade setuptools" // note: sync cibuildwheel version with GitHub Action // .github/workflow/build_wheel.yml:build_wheels "Install cibuildwheel" step executable 'sh' @@ -110,6 +110,7 @@ platform_identifiers_map.each { platform, idsuffix -> } } + /*************************************************************************************************/ // Non-testing builds and analysis tasks diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile index 73d83343e033..a49933ee6604 100644 --- a/sdks/python/container/Dockerfile +++ b/sdks/python/container/Dockerfile @@ -45,7 +45,7 @@ RUN \ && \ rm -rf /var/lib/apt/lists/* && \ - pip install --upgrade setuptools && \ + pip install --upgrade pip setuptools wheel && \ # Install required packages for Beam Python SDK and common dependencies used by users. # use --no-deps to ensure the list includes all transitive dependencies. diff --git a/sdks/python/container/base_image_requirements_manual.txt b/sdks/python/container/base_image_requirements_manual.txt index e952b2126604..f2f3ea44b44c 100644 --- a/sdks/python/container/base_image_requirements_manual.txt +++ b/sdks/python/container/base_image_requirements_manual.txt @@ -43,3 +43,4 @@ nose==1.3.7 # For Dataflow internal testing. TODO: remove this. python-snappy;python_version<"3.11" # Optimizes execution of some Beam codepaths. scipy scikit-learn +build>=1.0,<2 # tool to build sdist from setup.py in stager. \ No newline at end of file diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py index 94d80c8d263b..2b488af0afb5 100644 --- a/sdks/python/gen_protos.py +++ b/sdks/python/gen_protos.py @@ -18,7 +18,7 @@ """ Generates Python proto modules and grpc stubs for Beam protos. """ - +import argparse import contextlib import glob import inspect @@ -27,9 +27,7 @@ import platform import re import shutil -import subprocess import sys -import time from collections import defaultdict from importlib import import_module @@ -60,7 +58,7 @@ NO_PROMISES_NOTICE = """ \"\"\" For internal use only; no backwards-compatibility guarantees. -Automatically generated when running setup.py sdist or build[_py]. +Automatically generated when running python -m build. \"\"\" """ @@ -321,43 +319,6 @@ def find_by_ext(root_dir, ext): if file.endswith(ext): yield clean_path(os.path.join(root, file)) - -def ensure_grpcio_exists(): - try: - from grpc_tools import protoc # pylint: disable=unused-import - except ImportError: - return _install_grpcio_tools() - - -def _install_grpcio_tools(): - """ - Though wheels are available for grpcio-tools, setup_requires uses - easy_install which doesn't understand them. This means that it is - compiled from scratch (which is expensive as it compiles the full - protoc compiler). Instead, we attempt to install a wheel in a temporary - directory and add it to the path as needed. - See https://github.com/pypa/setuptools/issues/377 - """ - install_path = os.path.join(PYTHON_SDK_ROOT, '.eggs', 'grpcio-wheels') - logging.warning('Installing grpcio-tools into %s', install_path) - start = time.time() - subprocess.check_call([ - sys.executable, - '-m', - 'pip', - 'install', - '--target', - install_path, - '--upgrade', - '-r', - os.path.join(PYTHON_SDK_ROOT, 'build-requirements.txt') - ]) - logging.warning( - 'Installing grpcio-tools took %0.2f seconds.', time.time() - start) - - return install_path - - def build_relative_import(root_path, import_path, start_file_path): tail_path = import_path.replace('.', os.path.sep) source_path = os.path.join(root_path, tail_path) @@ -511,33 +472,31 @@ def generate_proto_files(force=False): if not os.path.exists(PYTHON_OUTPUT_PATH): os.mkdir(PYTHON_OUTPUT_PATH) - grpcio_install_loc = ensure_grpcio_exists() protoc_gen_mypy = _find_protoc_gen_mypy() - with PythonPath(grpcio_install_loc): - from grpc_tools import protoc - builtin_protos = pkg_resources.resource_filename('grpc_tools', '_proto') - args = ( - [sys.executable] + # expecting to be called from command line - ['--proto_path=%s' % builtin_protos] + - ['--proto_path=%s' % d - for d in proto_dirs] + ['--python_out=%s' % PYTHON_OUTPUT_PATH] + - ['--plugin=protoc-gen-mypy=%s' % protoc_gen_mypy] + - # new version of mypy-protobuf converts None to zero default value - # and remove Optional from the param type annotation. This causes - # some mypy errors. So to mitigate and fall back to old behavior, - # use `relax_strict_optional_primitives` flag. more at - # https://github.com/nipunn1313/mypy-protobuf/tree/main#relax_strict_optional_primitives # pylint:disable=line-too-long - ['--mypy_out=relax_strict_optional_primitives:%s' % PYTHON_OUTPUT_PATH - ] + - # TODO(robertwb): Remove the prefix once it's the default. - ['--grpc_python_out=grpc_2_0:%s' % PYTHON_OUTPUT_PATH] + proto_files) - - LOG.info('Regenerating Python proto definitions (%s).' % regenerate_reason) - ret_code = protoc.main(args) - if ret_code: - raise RuntimeError( - 'Protoc returned non-zero status (see logs for details): ' - '%s' % ret_code) + from grpc_tools import protoc + builtin_protos = pkg_resources.resource_filename('grpc_tools', '_proto') + args = ( + [sys.executable] + # expecting to be called from command line + ['--proto_path=%s' % builtin_protos] + + ['--proto_path=%s' % d + for d in proto_dirs] + ['--python_out=%s' % PYTHON_OUTPUT_PATH] + + ['--plugin=protoc-gen-mypy=%s' % protoc_gen_mypy] + + # new version of mypy-protobuf converts None to zero default value + # and remove Optional from the param type annotation. This causes + # some mypy errors. So to mitigate and fall back to old behavior, + # use `relax_strict_optional_primitives` flag. more at + # https://github.com/nipunn1313/mypy-protobuf/tree/main#relax_strict_optional_primitives # pylint:disable=line-too-long + ['--mypy_out=relax_strict_optional_primitives:%s' % PYTHON_OUTPUT_PATH + ] + + # TODO(robertwb): Remove the prefix once it's the default. + ['--grpc_python_out=grpc_2_0:%s' % PYTHON_OUTPUT_PATH] + proto_files) + + LOG.info('Regenerating Python proto definitions (%s).' % regenerate_reason) + ret_code = protoc.main(args) + if ret_code: + raise RuntimeError( + 'Protoc returned non-zero status (see logs for details): ' + '%s' % ret_code) # copy resource files for path in MODEL_RESOURCES: @@ -548,7 +507,7 @@ def generate_proto_files(force=False): # force relative import paths for proto files compiled_import_re = re.compile('^from (.*) import (.*)$') for file_path in find_by_ext(PYTHON_OUTPUT_PATH, - ('_pb2.py', '_pb2_grpc.py', '_pb2.pyi')): + ('_pb2.py', '_pb2_grpc.py', '_pb2.pyi')): proto_packages.add(os.path.dirname(file_path)) lines = [] with open(file_path, encoding='utf-8') as f: @@ -566,12 +525,14 @@ def generate_proto_files(force=False): f.writelines(lines) generate_init_files_lite(PYTHON_OUTPUT_PATH) - with PythonPath(grpcio_install_loc): - for proto_package in proto_packages: - generate_urn_files(proto_package, PYTHON_OUTPUT_PATH) + for proto_package in proto_packages: + generate_urn_files(proto_package, PYTHON_OUTPUT_PATH) generate_init_files_full(PYTHON_OUTPUT_PATH) if __name__ == '__main__': - generate_proto_files(force=True) + parser = argparse.ArgumentParser() + parser.add_argument('--no-force', dest='force', action='store_false') + args = parser.parse_args() + generate_proto_files(force=args.force) diff --git a/sdks/python/pyproject.toml b/sdks/python/pyproject.toml new file mode 100644 index 000000000000..d185c45f6191 --- /dev/null +++ b/sdks/python/pyproject.toml @@ -0,0 +1,36 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# since we rely on setuptools and according to https://peps.python.org/pep-0518/#build-system-table +# this is the minimum requirements for the build system to execute. +[build-system] +requires = [ + "setuptools", + "wheel>=0.36.0", + "grpcio-tools==1.53.0", + "mypy-protobuf==3.5.0", + # Avoid https://github.com/pypa/virtualenv/issues/2006 + "distlib==0.3.7", + # Numpy headers + "numpy>=1.14.3,<1.25", # Update setup.py as well. + # having cython here will create wheels that are platform dependent. + "cython==0.29.36", +] + + +# legacy installation is needed to generate `apache_beam.portability.api` package. +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/sdks/python/scripts/run_pytest.sh b/sdks/python/scripts/run_pytest.sh index 01f2318164c4..ad35b48972b6 100755 --- a/sdks/python/scripts/run_pytest.sh +++ b/sdks/python/scripts/run_pytest.sh @@ -42,10 +42,10 @@ echo "posargs: $posargs" # Run with pytest-xdist and without. pytest -o junit_suite_name=${envname} \ - --junitxml=pytest_${envname}.xml -m 'not no_xdist' -n 6 ${pytest_args} --pyargs ${posargs} + --junitxml=pytest_${envname}.xml -m 'not no_xdist' -n 6 --import-mode=importlib ${pytest_args} --pyargs ${posargs} status1=$? pytest -o junit_suite_name=${envname}_no_xdist \ - --junitxml=pytest_${envname}_no_xdist.xml -m 'no_xdist' ${pytest_args} --pyargs ${posargs} + --junitxml=pytest_${envname}_no_xdist.xml -m 'no_xdist' --import-mode=importlib ${pytest_args} --pyargs ${posargs} status2=$? # Exit with error if no tests were run in either suite (status code 5). diff --git a/sdks/python/scripts/run_tox.sh b/sdks/python/scripts/run_tox.sh index ebbacf5494ea..ac60f26b32ba 100755 --- a/sdks/python/scripts/run_tox.sh +++ b/sdks/python/scripts/run_tox.sh @@ -53,12 +53,21 @@ if [[ "$JENKINS_HOME" != "" ]]; then export PY_COLORS=1 fi -if [[ ! -z $2 ]]; then +# Determine if the second argument is SDK_LOCATION or posargs +if [[ -f "$1" ]]; then # Check if the argument corresponds to a file SDK_LOCATION="$1" - shift; - tox -c tox.ini run --recreate -e "$TOX_ENVIRONMENT" --installpkg "$SDK_LOCATION" -- "$@" -else - tox -c tox.ini run --recreate -e "$TOX_ENVIRONMENT" + shift +fi + +# If SDK_LOCATION is identified and there are still arguments left, those are posargs. +if [[ ! -z "$SDK_LOCATION" ]]; then + if [[ $# -gt 0 ]]; then # There are posargs + tox -c tox.ini run --recreate -e "$TOX_ENVIRONMENT" --installpkg "$SDK_LOCATION" -- "$@" + else + tox -c tox.ini run --recreate -e "$TOX_ENVIRONMENT" --installpkg "$SDK_LOCATION" + fi +else # No SDK_LOCATION; all arguments are posargs + tox -c tox.ini run --recreate -e "$TOX_ENVIRONMENT" -- "$@" fi exit_code=$? diff --git a/sdks/python/setup.py b/sdks/python/setup.py index ca585ccf7167..4a05544526fc 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -18,6 +18,7 @@ """Apache Beam SDK for Python setup file.""" import os +import subprocess import sys import warnings # Pylint and isort disagree here. @@ -62,7 +63,6 @@ def get_project_path(self): return os.path.join(project_path, to_filename(ei_cmd.egg_name)) def run(self): - import subprocess args = ['mypy', self.get_project_path()] result = subprocess.call(args) if result != 0: @@ -155,12 +155,18 @@ def cythonize(*args, **kwargs): # We must generate protos after setup_requires are installed. def generate_protos_first(): try: - # pylint: disable=wrong-import-position - import gen_protos - gen_protos.generate_proto_files() - - except ImportError: - warnings.warn("Could not import gen_protos, skipping proto generation.") + # Pyproject toml build happens in isolated environemnts. In those envs, + # gen_protos is unable to get imported. so we run a subprocess call. + cwd = os.path.abspath(os.path.dirname(__file__)) + out = subprocess.run([ + sys.executable, + os.path.join(cwd, 'gen_protos.py'), + '--no-force' + ], capture_output=True, check=True) + print(out.stdout) + except subprocess.CalledProcessError as err: + raise RuntimeError('Could not generate protos due to error: %s', + err.stderr) def get_portability_package_data(): @@ -188,6 +194,27 @@ def get_portability_package_data(): # structure must exist before the call to setuptools.find_packages() # executes below. generate_protos_first() + + # generate cythonize extensions only if we are building a wheel or + # building an extension or running in editable mode. + cythonize_cmds = ('bdist_wheel', 'build_ext', 'editable_wheel') + if any(cmd in sys.argv for cmd in cythonize_cmds): + extensions = cythonize([ + 'apache_beam/**/*.pyx', + 'apache_beam/coders/coder_impl.py', + 'apache_beam/metrics/cells.py', + 'apache_beam/metrics/execution.py', + 'apache_beam/runners/common.py', + 'apache_beam/runners/worker/logger.py', + 'apache_beam/runners/worker/opcounters.py', + 'apache_beam/runners/worker/operations.py', + 'apache_beam/transforms/cy_combiners.py', + 'apache_beam/transforms/stats.py', + 'apache_beam/utils/counters.py', + 'apache_beam/utils/windowed_value.py', + ]) + else: + extensions = [] # Keep all dependencies inlined in the setup call, otherwise Dependabot won't # be able to parse it. setuptools.setup( @@ -213,21 +240,7 @@ def get_portability_package_data(): *get_portability_package_data() ] }, - ext_modules=cythonize([ - 'apache_beam/**/*.pyx', - 'apache_beam/coders/coder_impl.py', - 'apache_beam/metrics/cells.py', - 'apache_beam/metrics/execution.py', - 'apache_beam/runners/common.py', - 'apache_beam/runners/worker/logger.py', - 'apache_beam/runners/worker/opcounters.py', - 'apache_beam/runners/worker/operations.py', - 'apache_beam/transforms/cy_combiners.py', - 'apache_beam/transforms/stats.py', - 'apache_beam/utils/counters.py', - 'apache_beam/utils/windowed_value.py', - ], - language_level=3), + ext_modules=extensions, install_requires=[ 'crcmod>=1.7,<2.0', 'orjson>=3.9.7,<4', @@ -250,7 +263,7 @@ def get_portability_package_data(): 'js2py>=0.74,<1', # numpy can have breaking changes in minor versions. # Use a strict upper bound. - 'numpy>=1.14.3,<1.25.0', # Update build-requirements.txt as well. + 'numpy>=1.14.3,<1.25.0', # Update pyproject.toml as well. 'objsize>=0.6.1,<0.7.0', 'packaging>=22.0', 'pymongo>=3.8.0,<5.0.0', diff --git a/sdks/python/test-suites/tox/common.gradle b/sdks/python/test-suites/tox/common.gradle index ee183dff4064..3fdd0c0c553b 100644 --- a/sdks/python/test-suites/tox/common.gradle +++ b/sdks/python/test-suites/tox/common.gradle @@ -29,18 +29,12 @@ test.dependsOn "testPy${pythonVersionSuffix}Cloud" // toxTask "testPy${pythonVersionSuffix}Dask", "py${pythonVersionSuffix}-dask", "${posargs}" // test.dependsOn "testPy${pythonVersionSuffix}Dask" -toxTask "testPy${pythonVersionSuffix}Cython", "py${pythonVersionSuffix}-cython", "${posargs}" -test.dependsOn "testPy${pythonVersionSuffix}Cython" toxTask "testPy38CloudCoverage", "py38-cloudcoverage", "${posargs}" test.dependsOn "testPy38CloudCoverage" project.tasks.register("preCommitPy${pythonVersionSuffix}") { - // Since codecoverage reports will always be generated for py38, - // all tests will be exercised. - if (pythonVersionSuffix.equals('38')) { - dependsOn = ["testPy38Cython"] - } else { - dependsOn = ["testPy${pythonVersionSuffix}Cloud", "testPy${pythonVersionSuffix}Cython"] - } + // Since codecoverage reports will always be generated for py38, + // all tests will be exercised. + dependsOn = ["testPy${pythonVersionSuffix}Cloud", "testPython${pythonVersionSuffix}"] } \ No newline at end of file diff --git a/sdks/python/test-suites/tox/py310/build.gradle b/sdks/python/test-suites/tox/py310/build.gradle index ea10fde831c6..f1e40a17951f 100644 --- a/sdks/python/test-suites/tox/py310/build.gradle +++ b/sdks/python/test-suites/tox/py310/build.gradle @@ -28,5 +28,3 @@ pythonVersion = '3.10' apply from: "../common.gradle" -// TODO(https://github.com/apache/beam/issues/20051): Remove this once tox uses isolated builds. -testPy310Cython.mustRunAfter testPython310, testPy310Cloud diff --git a/sdks/python/test-suites/tox/py311/build.gradle b/sdks/python/test-suites/tox/py311/build.gradle index 1bb3766500bb..fabf9fd4365a 100644 --- a/sdks/python/test-suites/tox/py311/build.gradle +++ b/sdks/python/test-suites/tox/py311/build.gradle @@ -28,5 +28,3 @@ pythonVersion = '3.11' apply from: "../common.gradle" -// TODO(https://github.com/apache/beam/issues/20051): Remove this once tox uses isolated builds. -testPy311Cython.mustRunAfter testPython311, testPy311Cloud diff --git a/sdks/python/test-suites/tox/py38/build.gradle b/sdks/python/test-suites/tox/py38/build.gradle index bc4aa99c79b4..b1ed5f88c7c9 100644 --- a/sdks/python/test-suites/tox/py38/build.gradle +++ b/sdks/python/test-suites/tox/py38/build.gradle @@ -43,8 +43,6 @@ lint.dependsOn mypyPy38 apply from: "../common.gradle" -// TODO(https://github.com/apache/beam/issues/20051): Remove this once tox uses isolated builds. -testPy38Cython.mustRunAfter testPython38, testPy38CloudCoverage // PyCoverage Precommit runs test suites that evaluate test coverage and compatibility of diff --git a/sdks/python/test-suites/tox/py39/build.gradle b/sdks/python/test-suites/tox/py39/build.gradle index 380cc1486daa..5bb73b60a5d2 100644 --- a/sdks/python/test-suites/tox/py39/build.gradle +++ b/sdks/python/test-suites/tox/py39/build.gradle @@ -27,6 +27,3 @@ applyPythonNature() pythonVersion = '3.9' apply from: "../common.gradle" - -// TODO(https://github.com/apache/beam/issues/20051): Remove this once tox uses isolated builds. -testPy39Cython.mustRunAfter testPython39, testPy39Cloud diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 1caf25caf080..1e797d96074f 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -17,7 +17,7 @@ [tox] # new environments will be excluded by default unless explicitly added to envlist. -envlist = py38,py39,py310,py311,py38-{cloud,cython,docs,lint,mypy,cloudcoverage,dask},py39-{cloud,cython},py310-{cloud,cython,dask},py311-{cloud,cython,dask},whitespacelint +envlist = py38,py39,py310,py311,py38-{cloud,docs,lint,mypy,cloudcoverage,dask},py39-{cloud},py310-{cloud,dask},py311-{cloud,dask},whitespacelint toxworkdir = {toxinidir}/target/{env:ENV_NAME:.tox} [pycodestyle] @@ -44,9 +44,6 @@ allowlist_externals = curl ./codecov chmod -deps = - cython: cython==0.29.33 - -r build-requirements.txt setenv = RUN_SKIPPED_PY3_TESTS=0 # Use an isolated tmp dir for tests that get slowed down by scanning /tmp. @@ -67,6 +64,7 @@ commands_pre = bash {toxinidir}/scripts/run_tox_cleanup.sh commands_post = bash {toxinidir}/scripts/run_tox_cleanup.sh + commands = false {envname} is misconfigured [testenv:py{38,39,310,311}] @@ -81,28 +79,18 @@ commands = install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages} list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze -[testenv:py{38,39,310,311}-cython] -# cython tests are only expected to work in linux (2.x and 3.x) -# If we want to add other platforms in the future, it should be: -# `platform = linux2|darwin|...` -# See https://docs.python.org/2/library/sys.html#sys.platform for platform codes -platform = linux -commands = - # TODO(https://github.com/apache/beam/issues/20051): Remove this build_ext invocation once local source no longer - # shadows the installed apache_beam. - python setup.py build_ext --inplace - python apache_beam/examples/complete/autocomplete_test.py - bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" - [testenv:py{38,39,310,311}-cloud] +; extras = test,gcp,interactive,dataframe,aws,azure extras = test,gcp,interactive,dataframe,aws,azure commands = + python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" [testenv:py{38,39,310,311}-dask] extras = test,dask commands = bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" + [testenv:py38-cloudcoverage] deps = pytest-cov==3.0.0 @@ -124,7 +112,6 @@ commands = setenv = # keep the version of pylint in sync with the 'rev' in .pre-commit-config.yaml deps = - -r build-requirements.txt astroid<2.17.0,>=2.15.6 pycodestyle==2.8.0 pylint==2.17.5 @@ -143,7 +130,6 @@ commands = [testenv:py38-mypy] deps = - -r build-requirements.txt mypy==0.790 dask==2022.01.0 distributed==2022.01.0 @@ -173,7 +159,6 @@ commands = # Used by hdfs_integration_test.sh. Do not run this directly, as it depends on # nodes defined in hdfs_integration_test/docker-compose.yml. deps = - -r build-requirements.txt holdup==1.8.0 extras = gcp @@ -206,7 +191,6 @@ commands_pre = # Do not run this directly, as it depends on nodes defined in # azure/integration_test/docker-compose.yml. deps = - -r build-requirements.txt extras = azure passenv = REQUESTS_CA_BUNDLE @@ -335,7 +319,6 @@ commands = [testenv:py{38,39,310,311}-pytorch-{19,110,111,112,113}] deps = - -r build-requirements.txt 19: torch>=1.9.0,<1.10.0 110: torch>=1.10.0,<1.11.0 111: torch>=1.11.0,<1.12.0 @@ -353,7 +336,6 @@ commands = [testenv:py{38,39,310}-pytorch-200] deps = - -r build-requirements.txt 200: torch>=2.0.0,<2.1.0 extras = test,gcp # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests. @@ -387,7 +369,6 @@ commands = [testenv:py{38,39,310}-tensorflow-212] deps = - -r build-requirements.txt 212: tensorflow>=2.12rc1,<2.13 extras = test,gcp commands = @@ -399,7 +380,6 @@ commands = [testenv:py{38,39,310}-xgboost-{160,170}] deps = - -r build-requirements.txt 160: xgboost>=1.6.0,<1.7.0 datatable==1.0.0 @@ -416,7 +396,6 @@ commands = [testenv:py{38,39,310,311}-transformers-{428,429,430}] deps = - -r build-requirements.txt 428: transformers>=4.28.0,<4.29.0 429: transformers>=4.29.0,<4.30.0 430: transformers>=4.30.0,<4.31.0 @@ -434,7 +413,6 @@ commands = [testenv:py{38,311}-vertex-ai] deps = - -r build-requirements.txt tensorflow==2.12.0 extras = test,gcp commands = diff --git a/website/www/site/content/en/documentation/ml/multi-language-inference.md b/website/www/site/content/en/documentation/ml/multi-language-inference.md index 0d7a972e0765..1480b37ab484 100644 --- a/website/www/site/content/en/documentation/ml/multi-language-inference.md +++ b/website/www/site/content/en/documentation/ml/multi-language-inference.md @@ -99,7 +99,7 @@ Finally, we postprocess the model predictions in the `Postprocess` DoFn. The `Po The custom Python code needs to be written in a local package and be compiled as a tarball. This package can then be used by the Java pipeline. The following example shows how to compile the Python package into a tarball: ```bash - python setup.py sdist + pip install --upgrade build && python -m build --sdist ``` In order to run this, a `setup.py` is required. The path to the tarball will be used as an argument in the pipeline options of the Java pipeline. diff --git a/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md b/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md index 378032ab6b58..c99c0b9c7cf8 100644 --- a/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md +++ b/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md @@ -66,16 +66,17 @@ If your pipeline uses packages that are not available publicly (e.g. packages th This command lists all packages that are installed on your machine, regardless of where they were installed from. -2. Run your pipeline with the following command-line option: + 1. Run your pipeline with the following command-line option: - --extra_package /path/to/package/package-name + --extra_package /path/to/package/package-name - where package-name is the package's tarball. If you have the `setup.py` for that - package then you can build the tarball with the following command: + where package-name is the package's tarball. You can build the package tarball using a command line tool called [build](https://setuptools.pypa.io/en/latest/userguide/quickstart.html#install-build). - python setup.py sdist + # Install build using pip + pip install --upgrade build + python -m build --sdist - See the [sdist documentation](https://docs.python.org/3/distutils/sourcedist.html) for more details on this command. + See the [build documentation](https://pypa-build.readthedocs.io/en/latest/index.html) for more details on this command. ## Multiple File Dependencies From 05861613f484a6159c21796006c52d8da5e10b2a Mon Sep 17 00:00:00 2001 From: Rebecca Szper <98840847+rszper@users.noreply.github.com> Date: Thu, 12 Oct 2023 13:20:39 -0700 Subject: [PATCH 13/13] Fix typo and remove extra button (#28965) * Fix typo and remove extra button * Remove button cell --- examples/notebooks/beam-ml/mltransform_basic.ipynb | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/examples/notebooks/beam-ml/mltransform_basic.ipynb b/examples/notebooks/beam-ml/mltransform_basic.ipynb index fd305bddb3ba..e44be91fe1cd 100644 --- a/examples/notebooks/beam-ml/mltransform_basic.ipynb +++ b/examples/notebooks/beam-ml/mltransform_basic.ipynb @@ -1,15 +1,5 @@ { "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, { "cell_type": "code", "source": [ @@ -77,7 +67,7 @@ "id": "f0097dbd-2657-4cbe-a334-e0401816db01" }, "source": [ - "## Import the requried modules\n", + "## Import the required modules\n", "\n", "To use `MLTransfrom`, install `tensorflow_transform` and the Apache Beam SDK version 2.50.0 or later.\n" ]