Skip to content

PostCommit Java Tpcds Spark #1739

PostCommit Java Tpcds Spark

PostCommit Java Tpcds Spark #1739

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: PostCommit Java Tpcds Spark
on:
schedule:
- cron: '30 4/6 * * *'
pull_request_target:
paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Java_Tpcds_Spark.json']
workflow_dispatch:
#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event
permissions:
actions: write
pull-requests: read
checks: read
contents: read
deployments: read
id-token: none
issues: read
discussions: read
packages: read
pages: read
repository-projects: read
security-events: read
statuses: read
# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}'
cancel-in-progress: true
env:
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
GRADLE_COMMAND_ARGUMENTS: |
--dataSize=1GB
--sourceType=PARQUET
--dataDirectory=gs://beam-tpcds/datasets/parquet/nonpartitioned
--resultsDirectory=gs://beam-tpcds/results/spark3-rdd/
--tpcParallel=1
INFLUXDB_USER: ${{ secrets.INFLUXDB_USER }}
INFLUXDB_USER_PASSWORD: ${{ secrets.INFLUXDB_USER_PASSWORD }}
tpcdsBigQueryArgs: |
--bigQueryTable=tpcds
--bigQueryDataset=tpcds
--project=apache-beam-testing
--resourceNameMode=QUERY_RUNNER_AND_MODE
--exportSummaryToBigQuery=true
--tempLocation=gs://temp-storage-for-perf-tests/tpcds
tpcdsInfluxDBArgs: |
--influxDatabase=beam_test_metrics
--influxHost=http://10.128.0.96:8086
--baseInfluxMeasurement=tpcds
--exportSummaryToInfluxDB=true
--influxRetentionPolicy=forever
tpcdsQueriesArg: 3,7,10,25,26,29,35,38,40,42,43,52,55,69,79,83,84,87,93,96
jobs:
beam_PostCommit_Java_Tpcds_Spark:
if: |
github.event_name == 'workflow_dispatch' ||
github.event_name == 'pull_request_target' ||
(github.event_name == 'schedule' && github.repository == 'apache/beam') ||
github.event.comment.body == 'Run Spark Runner Tpcds Tests'
runs-on: [self-hosted, ubuntu-20.04, main]
timeout-minutes: 240
name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) ${{ matrix.runner }}
strategy:
matrix:
job_name: ["beam_PostCommit_Java_Tpcds_Spark"]
job_phrase: ["Run Spark Runner Tpcds Tests"]
runner: [SparkRunner, SparkStructuredStreamingRunner]
steps:
- uses: actions/checkout@v4
- name: Setup repository
uses: ./.github/actions/setup-action
with:
comment_phrase: ${{ matrix.job_phrase }}
github_token: ${{ secrets.GITHUB_TOKEN }}
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) ${{ matrix.runner }}
- name: Setup environment
uses: ./.github/actions/setup-environment-action
- name: run PostCommit Java Tpcds Spark (${{ matrix.runner }}) script
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:java:testing:tpcds:run
arguments: |
-Ptpcds.runner=:runners:spark:3 \
"-Ptpcds.args=${{env.tpcdsBigQueryArgs}} ${{env.tpcdsInfluxDBArgs}} ${{ env.GRADLE_COMMAND_ARGUMENTS }} --runner=${{ matrix.runner }} --queries=${{env.tpcdsQueriesArg}}" \