Skip to content

[Bug]: Python SDFs (e.g. PeriodicImpulse) running in Flink and polling using tracker.defer_remainder have checkpoint size growing indefinitely #2286

[Bug]: Python SDFs (e.g. PeriodicImpulse) running in Flink and polling using tracker.defer_remainder have checkpoint size growing indefinitely

[Bug]: Python SDFs (e.g. PeriodicImpulse) running in Flink and polling using tracker.defer_remainder have checkpoint size growing indefinitely #2286

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: PostCommit Java IO Performance Tests
on:
push:
tags: ['v*']
branches: ['master', 'release-*']
paths: ['it/google-cloud-platform/**','.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml']
issue_comment:
types: [created]
schedule:
- cron: '0 */6 * * *'
workflow_dispatch:
#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event
permissions:
actions: write
pull-requests: read
checks: read
contents: read
deployments: read
id-token: none
issues: read
discussions: read
packages: read
pages: read
repository-projects: read
security-events: read
statuses: read
# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.sender.login }}-${{ github.event.schedule }}'
cancel-in-progress: true
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
jobs:
beam_PostCommit_Java_IO_Performance_Tests:
if: |
github.event_name == 'push' ||
github.event_name == 'workflow_dispatch' ||
github.event_name == 'schedule' ||
github.event.comment.body == 'Run Java PostCommit IO Performance Tests'
runs-on: [self-hosted, ubuntu-20.04, main]
name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.test_case }})
strategy:
matrix:
job_name: ["beam_PostCommit_Java_IO_Performance_Tests"]
job_phrase: ["Run Java PostCommit IO Performance Tests"]
test_case: ["GCSPerformanceTest", "BigTablePerformanceTest", "BigQueryStorageApiStreamingPerformanceTest"]
steps:
- uses: actions/checkout@v4
- name: Setup repository
uses: ./.github/actions/setup-action
with:
comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.test_case }}
github_token: ${{ secrets.GITHUB_TOKEN }}
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.test_case }})
- name: Checkout release branch
if: github.event_name == 'schedule' #This has scheduled runs run against the latest release
uses: actions/checkout@v4
with:
ref: v2.50.0 #TODO(https://github.com/apache/beam/issues/28330) automate updating this
repository: apache/beam
- name: Setup environment
uses: ./.github/actions/setup-environment-action
with:
java-version: 8
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
export_default_credentials: true
- name: run scheduled javaPostcommitIOPerformanceTests script
if: github.event_name == 'schedule' #This ensures only scheduled runs publish metrics publicly by changing which exportTable is configured
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :it:${{ matrix.test_case }}
env:
exportDataset: performance_tests
exportTable: io_performance_metrics
- name: run triggered javaPostcommitIOPerformanceTests script
if: github.event_name != 'schedule'
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :it:${{ matrix.test_case }}
env:
exportDataset: performance_tests
exportTable: io_performance_metrics_test
- name: Archive JUnit Test Results
uses: actions/upload-artifact@v3
if: failure()
with:
name: JUnit Test Results
path: "**/build/reports/tests/"
- name: Publish JUnit Test Results
uses: EnricoMi/publish-unit-test-result-action@v2
if: always()
with:
commit: '${{ env.prsha || env.GITHUB_SHA }}'
comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }}
files: '**/build/test-results/**/*.xml'