Skip to content

Commit

Permalink
Add Inference Python Benchmarks Dataflow workflow (apache#28943)
Browse files Browse the repository at this point in the history
  • Loading branch information
Amar3tto authored and damondouglas committed Oct 16, 2023
1 parent 09cc368 commit 214f99a
Show file tree
Hide file tree
Showing 6 changed files with 316 additions and 0 deletions.
144 changes: 144 additions & 0 deletions .github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Inference Python Benchmarks Dataflow

on:
issue_comment:
types: [created]
schedule:
- cron: '50 3 * * *'
workflow_dispatch:

#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event
permissions:
actions: write
pull-requests: read
checks: read
contents: read
deployments: read
id-token: none
issues: read
discussions: read
packages: read
pages: read
repository-projects: read
security-events: read
statuses: read

# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}'
cancel-in-progress: true

env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}

jobs:
beam_Inference_Python_Benchmarks_Dataflow:
if: |
github.event_name == 'workflow_dispatch' ||
github.event_name == 'schedule' ||
github.event.comment.body == 'Run Inference Benchmarks'
runs-on: [self-hosted, ubuntu-20.04, main]
timeout-minutes: 900
name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
strategy:
matrix:
job_name: ["beam_Inference_Python_Benchmarks_Dataflow"]
job_phrase: ["Run Inference Benchmarks"]
steps:
- uses: actions/checkout@v3
- name: Setup repository
uses: ./.github/actions/setup-action
with:
comment_phrase: ${{ matrix.job_phrase }}
github_token: ${{ secrets.GITHUB_TOKEN }}
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
- name: Setup Python environment
uses: ./.github/actions/setup-environment-action
with:
python-version: '3.8'
- name: Prepare test arguments
uses: ./.github/actions/test-arguments-action
with:
test-type: load
test-language: python
argument-file-paths: |
${{ github.workspace }}/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Vision_Classification_Resnet_101.txt
${{ github.workspace }}/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Imagenet_Classification_Resnet_152.txt
${{ github.workspace }}/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Language_Modeling_Bert_Base_Uncased.txt
${{ github.workspace }}/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Language_Modeling_Bert_Large_Uncased.txt
${{ github.workspace }}/.github/workflows/load-tests-job-configs/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Imagenet_Classification_Resnet_152_Tesla_T4_GPU.txt
# The env variables are created and populated in the test-arguments-action as "<github.job>_test_arguments_<argument_file_paths_index>"
- name: get current time
run: echo "NOW_UTC=$(date '+%m%d%H%M%S' --utc)" >> $GITHUB_ENV
- name: run Pytorch Vision Classification with Resnet 101
uses: ./.github/actions/gradle-command-self-hosted-action
timeout-minutes: 180
with:
gradle-command: :sdks:python:apache_beam:testing:load_tests:run
arguments: |
-PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \
-Prunner=DataflowRunner \
-PpythonVersion=3.8 \
-PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
'-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_1 }} --job_name=benchmark-tests-pytorch-imagenet-python-101-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet101-${{env.NOW_UTC}}.txt' \
- name: run Pytorch Imagenet Classification with Resnet 152
uses: ./.github/actions/gradle-command-self-hosted-action
timeout-minutes: 180
with:
gradle-command: :sdks:python:apache_beam:testing:load_tests:run
arguments: |
-PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \
-Prunner=DataflowRunner \
-PpythonVersion=3.8 \
-PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
'-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-pytorch-imagenet-python-152-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152-${{env.NOW_UTC}}.txt' \
- name: run Pytorch Language Modeling using Hugging face bert-base-uncased model
uses: ./.github/actions/gradle-command-self-hosted-action
timeout-minutes: 180
with:
gradle-command: :sdks:python:apache_beam:testing:load_tests:run
arguments: |
-PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \
-Prunner=DataflowRunner \
-PpythonVersion=3.8 \
-PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
'-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_3 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-base-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_base_uncased-${{env.NOW_UTC}}.txt' \
- name: run Pytorch Langauge Modeling using Hugging Face bert-large-uncased model
uses: ./.github/actions/gradle-command-self-hosted-action
timeout-minutes: 180
with:
gradle-command: :sdks:python:apache_beam:testing:load_tests:run
arguments: |
-PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \
-Prunner=DataflowRunner \
-PpythonVersion=3.8 \
-PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
'-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_4 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-large-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_large_uncased-${{env.NOW_UTC}}.txt' \
- name: run Pytorch Imagenet Classification with Resnet 152 with Tesla T4 GPU
uses: ./.github/actions/gradle-command-self-hosted-action
timeout-minutes: 180
with:
gradle-command: :sdks:python:apache_beam:testing:load_tests:run
arguments: |
-PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \
-Prunner=DataflowRunner \
-PpythonVersion=3.8 \
-PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
'-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_5 }} --job_name=benchmark-tests-pytorch-imagenet-python-gpu-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152_gpu-${{env.NOW_UTC}}.txt'
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

--region=us-central1
--machine_type=n1-standard-2
--num_workers=75
--disk_size_gb=50
--autoscaling_algorithm=NONE
--staging_location=gs://temp-storage-for-perf-tests/loadtests
--temp_location=gs://temp-storage-for-perf-tests/loadtests
--requirements_file=apache_beam/ml/inference/torch_tests_requirements.txt
--publish_to_big_query=true
--metrics_dataset=beam_run_inference
--metrics_table=torch_inference_imagenet_results_resnet152
--input_options={}
--influx_measurement=torch_inference_imagenet_resnet152
--pretrained_model_name=resnet152
--device=CPU
--input_file=gs://apache-beam-ml/testing/inputs/openimage_50k_benchmark.txt
--model_state_dict_path=gs://apache-beam-ml/models/torchvision.models.resnet152.pth
--runner=DataflowRunner
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

--region=us-central1
--machine_type=n1-standard-2
--num_workers=75
--disk_size_gb=50
--autoscaling_algorithm=NONE
--staging_location=gs://temp-storage-for-perf-tests/loadtests
--temp_location=gs://temp-storage-for-perf-tests/loadtests
--requirements_file=apache_beam/ml/inference/torch_tests_requirements.txt
--publish_to_big_query=true
--metrics_dataset=beam_run_inference
--metrics_table=torch_inference_imagenet_results_resnet152_tesla_t4
--input_options={}
--influx_measurement=torch_inference_imagenet_resnet152_tesla_t4
--pretrained_model_name=resnet152
--device=GPU
--experiments=worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver
--sdk_container_image=us.gcr.io/apache-beam-testing/python-postcommit-it/tensor_rt:latest
--input_file=gs://apache-beam-ml/testing/inputs/openimage_50k_benchmark.txt
--model_state_dict_path=gs://apache-beam-ml/models/torchvision.models.resnet152.pth
--runner=DataflowRunner
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

--region=us-central1
--machine_type=n1-standard-2
--num_workers=250
--disk_size_gb=50
--autoscaling_algorithm=NONE
--staging_location=gs://temp-storage-for-perf-tests/loadtests
--temp_location=gs://temp-storage-for-perf-tests/loadtests
--requirements_file=apache_beam/ml/inference/torch_tests_requirements.txt
--publish_to_big_query=true
--metrics_dataset=beam_run_inference
--metrics_table=torch_language_modeling_bert_base_uncased
--input_options={}
--influx_measurement=torch_language_modeling_bert_base_uncased
--device=CPU
--input_file=gs://apache-beam-ml/testing/inputs/sentences_50k.txt
--bert_tokenizer=bert-base-uncased
--model_state_dict_path=gs://apache-beam-ml/models/huggingface.BertForMaskedLM.bert-base-uncased.pth
--runner=DataflowRunner
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

--region=us-central1
--machine_type=n1-standard-2
--num_workers=250
--disk_size_gb=50
--autoscaling_algorithm=NONE
--staging_location=gs://temp-storage-for-perf-tests/loadtests
--temp_location=gs://temp-storage-for-perf-tests/loadtests
--requirements_file=apache_beam/ml/inference/torch_tests_requirements.txt
--publish_to_big_query=true
--metrics_dataset=beam_run_inference
--metrics_table=torch_language_modeling_bert_large_uncased
--input_options={}
--influx_measurement=torch_language_modeling_bert_large_uncased
--device=CPU
--input_file=gs://apache-beam-ml/testing/inputs/sentences_50k.txt
--bert_tokenizer=bert-large-uncased
--model_state_dict_path=gs://apache-beam-ml/models/huggingface.BertForMaskedLM.bert-large-uncased.pth
--runner=DataflowRunner
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

--region=us-central1
--machine_type=n1-standard-2
--num_workers=75
--disk_size_gb=50
--autoscaling_algorithm=NONE
--staging_location=gs://temp-storage-for-perf-tests/loadtests
--temp_location=gs://temp-storage-for-perf-tests/loadtests
--requirements_file=apache_beam/ml/inference/torch_tests_requirements.txt
--publish_to_big_query=true
--metrics_dataset=beam_run_inference
--metrics_table=torch_inference_imagenet_results_resnet101
--input_options={}
--influx_measurement=torch_inference_imagenet_resnet101
--pretrained_model_name=resnet101
--device=CPU
--input_file=gs://apache-beam-ml/testing/inputs/openimage_50k_benchmark.txt
--model_state_dict_path=gs://apache-beam-ml/models/torchvision.models.resnet101.pth
--runner=DataflowRunner

0 comments on commit 214f99a

Please sign in to comment.