Skip to content

Commit

Permalink
Merge branch 'master' into changelog
Browse files Browse the repository at this point in the history
  • Loading branch information
damccorm authored Dec 2, 2024
2 parents d73e6cd + f28e65f commit 2c5957a
Show file tree
Hide file tree
Showing 348 changed files with 15,638 additions and 6,639 deletions.
1 change: 1 addition & 0 deletions .asf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ github:

protected_branches:
master: {}
release-2.61.0: {}
release-2.60.0: {}
release-2.59.0: {}
release-2.58.1: {}
Expand Down
6 changes: 6 additions & 0 deletions .github/REVIEWERS.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ labels:
reviewers:
- svetakvsundhar
exclusionList: []
- name: kafka
reviewers:
- johnjcasey
- fozzie15
- Dippatel98
- sjvanrossum
- name: Build
reviewers:
- damccorm
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,7 @@ spec:
scaleDownThreshold: '0.25'
scaleUpFactor: '2'
scaleDownFactor: '0.5'
- type: TotalNumberOfQueuedAndInProgressWorkflowRuns
repositoryNames:
- beam
%{~ endif ~}
17 changes: 17 additions & 0 deletions .github/gh-actions-self-hosted-runners/arc/environments/beam.env
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,22 @@ additional_runner_pools = [{
labels = ["self-hosted", "ubuntu-20.04", "highmem"]
enable_selector = true
enable_taint = true
},
{
name = "highmem-runner-22"
machine_type = "c3-highmem-22"
runner_image = "us-central1-docker.pkg.dev/apache-beam-testing/beam-github-actions/beam-arc-runner:3063b55757509dad1c14751c9f2aa5905826d9a0"
min_node_count = "0"
max_node_count = "2"
min_replicas = "0"
max_replicas = "2"
webhook_scaling = false
requests = {
cpu = "7.5"
memory = "100Gi"
}
labels = ["self-hosted", "ubuntu-20.04", "highmem22"]
enable_selector = true
enable_taint = true
}]
#state_bucket_name = "beam-arc-state"
2 changes: 1 addition & 1 deletion .github/trigger_files/IO_Iceberg_Integration_Tests.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 3
"modification": 2
}
5 changes: 4 additions & 1 deletion .github/trigger_files/beam_PostCommit_Java.json
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
{}
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 1
}
4 changes: 4 additions & 0 deletions .github/trigger_files/beam_PostCommit_Java_Avro_Versions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 2
}
3 changes: 2 additions & 1 deletion .github/trigger_files/beam_PostCommit_Java_DataflowV2.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run"
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
4 changes: 4 additions & 0 deletions .github/trigger_files/beam_PostCommit_Java_Nexmark_Flink.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"runFor": "#33146"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 1
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"https://github.com/apache/beam/pull/31156": "noting that PR #31156 should run this test",
"https://github.com/apache/beam/pull/32648": "testing addition of Flink 1.19 support"
"runFor": "#33146"
}
2 changes: 1 addition & 1 deletion .github/trigger_files/beam_PostCommit_Python.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run.",
"modification": 3
"modification": 5
}

4 changes: 4 additions & 0 deletions .github/trigger_files/beam_PostCommit_Python_Dependency.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 1
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 2
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@

{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 1
"modification": 2
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 2
"modification": 3
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 1
"modification": 2
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"revision: "1"
"revision": 3
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
1 change: 1 addition & 0 deletions .github/trigger_files/beam_PostCommit_XVR_Samza.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
4 changes: 4 additions & 0 deletions .github/trigger_files/beam_PreCommit_Flink_Container.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 2
}
2 changes: 1 addition & 1 deletion .github/workflows/IO_Iceberg_Integration_Tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,4 @@ jobs:
- name: Run IcebergIO Integration Test
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:java:io:iceberg:catalogTests
gradle-command: :sdks:java:io:iceberg:catalogTests --info
2 changes: 0 additions & 2 deletions .github/workflows/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,6 @@ PostCommit Jobs run in a schedule against master branch and generally do not get
| [ PostCommit Java SingleStoreIO IT ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml) | N/A |`beam_PostCommit_Java_SingleStoreIO_IT.json`| [![.github/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml?query=event%3Aschedule) |
| [ PostCommit Java PVR Spark3 Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml) | N/A |`beam_PostCommit_Java_PVR_Spark3_Streaming.json`| [![.github/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml?query=event%3Aschedule) |
| [ PostCommit Java PVR Spark Batch ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml) | N/A |`beam_PostCommit_Java_PVR_Spark_Batch.json`| [![.github/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml?query=event%3Aschedule) |
| [ PostCommit Java Sickbay ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml) | N/A |`beam_PostCommit_Java_Sickbay.json`| [![.github/workflows/beam_PostCommit_Java_Sickbay.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml?query=event%3Aschedule) |
| [ PostCommit Java Tpcds Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Dataflow.yml) | N/A |`beam_PostCommit_Java_Tpcds_Dataflow.json`| [![.github/workflows/beam_PostCommit_Java_Tpcds_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Dataflow.yml?query=event%3Aschedule) |
| [ PostCommit Java Tpcds Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Flink.yml) | N/A |`beam_PostCommit_Java_Tpcds_Flink.json`| [![.github/workflows/beam_PostCommit_Java_Tpcds_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Flink.yml?query=event%3Aschedule) |
| [ PostCommit Java Tpcds Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Spark.yml) | N/A |`beam_PostCommit_Java_Tpcds_Spark.json`| [![.github/workflows/beam_PostCommit_Java_Tpcds_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Tpcds_Spark.yml?query=event%3Aschedule) |
Expand Down Expand Up @@ -372,7 +371,6 @@ PostCommit Jobs run in a schedule against master branch and generally do not get
| [ PostCommit Python Xlang Gcp Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml) | N/A |`beam_PostCommit_Python_Xlang_Gcp_Dataflow.json`| [![.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml?query=event%3Aschedule) |
| [ PostCommit Python Xlang Gcp Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml) | N/A |`beam_PostCommit_Python_Xlang_Gcp_Direct.json`| [![.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml?query=event%3Aschedule) |
| [ PostCommit Python Xlang IO Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml) | N/A |`beam_PostCommit_Python_Xlang_IO_Dataflow.json`| [![.github/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml?query=event%3Aschedule) |
| [ PostCommit Sickbay Python ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | ['3.8','3.9','3.10','3.11'] |`beam_PostCommit_Sickbay_Python.json`| [![.github/workflows/beam_PostCommit_Sickbay_Python.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml?query=event%3Aschedule) |
| [ PostCommit SQL ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_SQL.yml) | N/A |`beam_PostCommit_SQL.json`| [![.github/workflows/beam_PostCommit_SQL.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_SQL.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_SQL.yml?query=event%3Aschedule) |
| [ PostCommit TransformService Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) | N/A |`beam_PostCommit_TransformService_Direct.json`| [![.github/workflows/beam_PostCommit_TransformService_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml?query=event%3Aschedule)
| [ PostCommit Website Test](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Test.yml) | N/A |`beam_PostCommit_Website_Test.json`| [![.github/workflows/beam_PostCommit_Website_Test.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Test.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Test.yml?query=event%3Aschedule) |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
(github.event_name == 'schedule' && github.repository == 'apache/beam') ||
github.event.comment.body == 'Run Load Tests Python Combine Flink Streaming'
runs-on: [self-hosted, ubuntu-20.04, main]
timeout-minutes: 720
timeout-minutes: 80
name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
strategy:
matrix:
Expand All @@ -89,17 +89,22 @@ jobs:
test-type: load
test-language: python
argument-file-paths: |
${{ github.workspace }}/.github/workflows/load-tests-pipeline-options/python_Combine_Flink_Streaming_2GB_Fanout_4.txt
${{ github.workspace }}/.github/workflows/load-tests-pipeline-options/python_Combine_Flink_Streaming_2GB_Fanout_8.txt
${{ github.workspace }}/.github/workflows/load-tests-pipeline-options/python_Combine_Flink_Streaming_small_Fanout_1.txt
${{ github.workspace }}/.github/workflows/load-tests-pipeline-options/python_Combine_Flink_Streaming_small_Fanout_2.txt
# large loads do not work now
# ${{ github.workspace }}/.github/workflows/load-tests-pipeline-options/python_Combine_Flink_Streaming_2GB_Fanout_4.txt
# ${{ github.workspace }}/.github/workflows/load-tests-pipeline-options/python_Combine_Flink_Streaming_2GB_Fanout_8.txt
- name: Start Flink with parallelism 16
env:
FLINK_NUM_WORKERS: 16
HIGH_MEM_MACHINE: n1-highmem-16
HIGH_MEM_FLINK_PROPS: flink:taskmanager.memory.process.size=16g,flink:taskmanager.memory.flink.size=12g,flink:taskmanager.memory.jvm-overhead.max=4g,flink:jobmanager.memory.process.size=6g,flink:jobmanager.memory.jvm-overhead.max= 2g,flink:jobmanager.memory.flink.size=4g
run: |
cd ${{ github.workspace }}/.test-infra/dataproc; ./flink_cluster.sh create
- name: get current time
run: echo "NOW_UTC=$(date '+%m%d%H%M%S' --utc)" >> $GITHUB_ENV
# The env variables are created and populated in the test-arguments-action as "<github.job>_test_arguments_<argument_file_paths_index>"
- name: run Load test 2GB Fanout 4
# The env variables are created and populated in the test-arguments-action as "<github.job>_test_arguments_<argument_file_paths_index>"
- name: run Load test small Fanout 1
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:python:apache_beam:testing:load_tests:run
Expand All @@ -108,7 +113,7 @@ jobs:
-PloadTest.mainClass=apache_beam.testing.load_tests.combine_test \
-Prunner=PortableRunner \
'-PloadTest.args=${{ env.beam_LoadTests_Python_Combine_Flink_Streaming_test_arguments_1 }} --job_name=load-tests-python-flink-streaming-combine-4-${{env.NOW_UTC}}' \
- name: run Load test 2GB Fanout 8
- name: run Load test small Fanout 2
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:python:apache_beam:testing:load_tests:run
Expand All @@ -123,4 +128,4 @@ jobs:
${{ github.workspace }}/.test-infra/dataproc/flink_cluster.sh delete
# // TODO(https://github.com/apache/beam/issues/20402). Skipping some cases because they are too slow:
# load-tests-python-flink-streaming-combine-1'
# load-tests-python-flink-streaming-combine-1'
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,17 @@
# specific language governing permissions and limitations
# under the License.

name: PostCommit Java Sickbay
name: PostCommit Java ValidatesDistrolessContainer Dataflow

on:
schedule:
- cron: '30 4/6 * * *'
- cron: '30 6/8 * * *'
pull_request_target:
paths: ['.github/trigger_files/beam_PostCommit_Java_Sickbay.json']
paths:
- 'release/trigger_all_tests.json'
- '.github/trigger_files/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.json'
- '.github/trigger_files/beam_PostCommit_Java_ValidatesDistrolessContainer_Dataflow.json'

workflow_dispatch:

# This allows a subsequently queued workflow run to interrupt previous runs
Expand Down Expand Up @@ -51,19 +55,19 @@ env:
GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}

jobs:
beam_PostCommit_Java_Sickbay:
beam_PostCommit_Java_ValidatesDistrolessContainer_Dataflow:
name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
runs-on: [self-hosted, ubuntu-20.04, main]
timeout-minutes: 120
timeout-minutes: 390
strategy:
matrix:
job_name: [beam_PostCommit_Java_Sickbay]
job_phrase: [Run Java Sickbay]
matrix:
job_name: [beam_PostCommit_Java_ValidatesDistrolessContainer_Dataflow]
job_phrase: [Run Java Dataflow ValidatesDistrolessContainer]
if: |
github.event_name == 'workflow_dispatch' ||
github.event_name == 'pull_request_target' ||
(github.event_name == 'schedule' && github.repository == 'apache/beam') ||
github.event.comment.body == 'Run Java Sickbay'
github.event.comment.body == 'Run Java Dataflow ValidatesDistrolessContainer'
steps:
- uses: actions/checkout@v4
- name: Setup repository
Expand All @@ -74,10 +78,21 @@ jobs:
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
- name: Setup environment
uses: ./.github/actions/setup-environment-action
- name: run PostCommit Java Sickbay script
with:
java-version: |
17
21
- name: Setup docker
run: |
gcloud auth configure-docker us-docker.pkg.dev --quiet
gcloud auth configure-docker us.gcr.io --quiet
gcloud auth configure-docker gcr.io --quiet
gcloud auth configure-docker us-central1-docker.pkg.dev --quiet
- name: run validatesDistrolessContainer script
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :javaPostCommitSickbay
gradle-command: :runners:google-cloud-dataflow-java:examplesJavaRunnerV2IntegrationTestDistroless
max-workers: 12
- name: Archive JUnit Test Results
uses: actions/upload-artifact@v4
if: ${{ !success() }}
Expand All @@ -90,4 +105,4 @@ jobs:
with:
commit: '${{ env.prsha || env.GITHUB_SHA }}'
comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }}
files: '**/build/test-results/**/*.xml'
files: '**/build/test-results/**/*.xml'
4 changes: 2 additions & 2 deletions .github/workflows/beam_PostCommit_Python_Examples_Flink.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
github.event_name == 'workflow_dispatch' ||
github.event_name == 'pull_request_target' ||
startsWith(github.event.comment.body, 'Run Python Examples_Flink')
runs-on: [self-hosted, ubuntu-20.04, main]
runs-on: [self-hosted, ubuntu-20.04, highmem22]
timeout-minutes: 240
name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }})
strategy:
Expand Down Expand Up @@ -101,4 +101,4 @@ jobs:
with:
commit: '${{ env.prsha || env.GITHUB_SHA }}'
comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }}
files: '**/pytest*.xml'
files: '**/pytest*.xml'
Loading

0 comments on commit 2c5957a

Please sign in to comment.