E2E performance #130

Workflow file for this run

.github/workflows/e2e-performance.yml at 17f8800

	name: E2E performance

	on:
	workflow_dispatch:
	inputs:
	install_ipex:
	description: Install Intel PyTorch Extension
	type: boolean
	default: true
	suite:
	description: Test suite
	type: choice
	options:
	- all
	- huggingface
	- timm_models
	- torchbench
	default: all
	mode:
	description: Inference/Training
	type: choice
	options:
	- all
	- inference
	- training
	default: all
	dtype:
	description: Data type
	type: choice
	options:
	- all
	- amp_bf16
	- amp_fp16
	- bfloat16
	- float16
	- float32
	default: all
	models:
	description: Run all models or a subset
	type: choice
	options:
	- all
	- subset
	default: subset
	only_one_model:
	description: Run only this one model
	type: string
	default: ""
	runner_label:
	description: Runner label, keep empty for default
	type: string
	default: ""
	TORCH_COMPILE_DEBUG:
	description: TORCH_COMPILE_DEBUG
	type: string
	default: ""

	permissions: read-all

	env:
	TRITON_DISABLE_LINE_INFO: 1
	USE_AOT_DEVLIST: pvc
	PYTHON_VERSION: "3.10"
	BENCHMARK_REPO: weishi-deng/benchmark.git
	BENCHMARK_BRANCH: main
	INSTALL_IPEX: ${{ inputs.install_ipex }}
	TORCH_XPU_OPS_COMMIT: 39522db63ce045f52c9d61a286018c266cd00479

	jobs:
	setup:
	name: Setup
	runs-on:
	- glados
	- spr
	- cpu
	outputs:
	suite: ${{ steps.set-matrix.outputs.suite }}
	mode: ${{ steps.set-matrix.outputs.mode }}
	dtype: ${{ steps.set-matrix.outputs.dtype }}
	models: ${{ steps.set-matrix.outputs.models }}
	timeout-minutes: 10
	steps:
	- name: Set matrix
	id: set-matrix
	run: \|
	if [[ -z "${{ inputs.suite }}" \|\| "${{ inputs.suite }}" == "all" ]]; then
	suite='["huggingface", "timm_models", "torchbench"]'
	else
	suite='["${{ inputs.suite }}"]'
	fi
	if [[ -z "${{ inputs.mode }}" \|\| "${{ inputs.mode }}" == "all" ]]; then
	mode='["inference", "training"]'
	else
	mode='["${{ inputs.mode }}"]'
	fi
	if [[ -z "${{ inputs.dtype }}" \|\| "${{ inputs.dtype }}" == "all" ]]; then
	dtype='["amp_bf16", "amp_fp16", "bfloat16", "float16", "float32"]'
	else
	dtype='["${{ inputs.dtype }}"]'
	fi
	if [[ -z "${{ inputs.models }}" ]]; then
	models="subset"
	else
	models="${{ inputs.models }}"
	fi
	echo "suite=$suite" >> $GITHUB_OUTPUT
	echo "mode=$mode" >> $GITHUB_OUTPUT
	echo "dtype=$dtype" >> $GITHUB_OUTPUT
	echo "models=$models" >> $GITHUB_OUTPUT

	print_inputs:
	name: Print inputs
	needs: setup
	runs-on: Linux
	steps:
	- name: Print inputs
	run: \|
	echo "${{ toJSON(github.event.inputs) }}"

	- name: Print setup outputs
	run: \|
	echo "${{ toJSON(needs.setup.outputs) }}"

	build:
	name: Test
	needs: setup
	runs-on:
	- ${{ inputs.runner_label \|\| 'max1550' }}
	strategy:
	matrix:
	suite: ${{ fromJson(needs.setup.outputs.suite) }}
	mode: ${{ fromJson(needs.setup.outputs.mode) }}
	dtype: ${{ fromJson(needs.setup.outputs.dtype) }}
	fail-fast: false
	timeout-minutes: 720
	defaults:
	run:
	shell: bash -noprofile --norc -eo pipefail -c "source /home/runner/intel/oneapi/setvars.sh > /dev/null; source {0}"
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Get benchmark commit id
	uses: ./.github/actions/get-commit-id
	with:
	repository: ${{ env.BENCHMARK_REPO }}
	branch: ${{ env.BENCHMARK_BRANCH }}
	variable: BENCHMARK_COMMIT_ID

	- name: Load pip cache
	id: pip-cache
	uses: ./.github/actions/load
	with:
	path: $HOME/.cache/pip
	# pip cache per commit id just to minimize network traffic
	key: pip-$PYTHON_VERSION-$GITHUB_SHA

	- name: Install Python
	uses: actions/setup-python@v5
	with:
	python-version: ${{ env.PYTHON_VERSION }}

	- name: Install Python build dependencies
	run: \|
	pip install wheel

	- name: Setup PyTorch
	if: ${{ env.INSTALL_IPEX == 'true' }}
	uses: ./.github/actions/setup-pytorch

	- name: Setup PyTorch without IPEX
	if: ${{ env.INSTALL_IPEX == 'false' }}
	uses: ./.github/actions/setup-pytorch
	with:
	repository: pytorch/pytorch
	torch_xpu_ops_commit: ${{ env.TORCH_XPU_OPS_COMMIT }}

	- name: Install latest nightly wheels including IPEX
	if: ${{ env.INSTALL_IPEX == 'true' }}
	uses: ./.github/actions/install-wheels
	with:
	gh_token: ${{ secrets.GITHUB_TOKEN }}
	python_version: ${{ env.PYTHON_VERSION }}

	- name: Install latest nightly wheels excluding IPEX
	if: ${{ env.INSTALL_IPEX == 'false' }}
	uses: ./.github/actions/install-wheels
	with:
	gh_token: ${{ secrets.GITHUB_TOKEN }}
	python_version: ${{ env.PYTHON_VERSION }}
	wheels_pattern: "!(intel_extension_for_pytorch-\|torch-).whl"

	- name: Setup fake IPEX
	if: ${{ env.INSTALL_IPEX == 'false' }}
	uses: ./.github/actions/setup-fake-ipex

	- name: Identify pinned versions
	run: \|
	cd pytorch
	echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" >> "${GITHUB_ENV}"
	echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" >> "${GITHUB_ENV}"

	# TIMM depends on torch and torchvision, so, in general, it needs to be installed before
	# installing custom torch and torchvision, but instead we install its dependencies except
	# torch and torchvision.
	- name: Install TIMM Models
	if: ${{ matrix.suite == 'timm_models' }}
	run: \|
	# install timm without dependencies
	pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@$TIMM_COMMIT_ID
	# install timm dependencies without torch and torchvision
	pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/$TIMM_COMMIT_ID/requirements.txt \| grep -vE torch)

	- name: Generate Triton cache key
	id: triton-key
	run: \|
	COMPOSITE_KEY=$(echo $PYTHON_VERSION $GITHUB_SHA \| sha256sum - \| cut -d\ -f1)
	echo "key=triton-$COMPOSITE_KEY" >> $GITHUB_OUTPUT

	- name: Load Triton wheels from a cache
	id: triton-cache
	uses: ./.github/actions/load
	with:
	path: python/dist
	key: ${{ steps.triton-key.outputs.key }}

	- name: Build Triton wheels
	if: ${{ steps.triton-cache.outputs.status == 'miss' }}
	uses: ./.github/actions/setup-triton
	with:
	command: DEBUG=1 python setup.py bdist_wheel

	- name: Install Triton
	run: \|
	pip install python/dist/*.whl

	- name: Save Triton wheels to a cache
	if: ${{ steps.triton-cache.outputs.status == 'miss' }}
	uses: ./.github/actions/save
	with:
	path: ${{ steps.triton-cache.outputs.path }}
	dest: ${{ steps.triton-cache.outputs.dest }}

	- name: Install python test dependencies
	run: \|
	pip install pyyaml pandas scipy numpy psutil pyre_extensions torchrec

	- name: Install transformers package
	if: ${{ matrix.suite == 'huggingface' }}
	uses: ./.github/actions/install-dependency
	with:
	package: transformers
	repository: huggingface/transformers
	ref: ${{ env.TRANSFORMERS_VERSION }}
	try-tag-prefix: v

	- name: Clone pytorch benchmark
	if: ${{ matrix.suite == 'torchbench' }}
	uses: actions/checkout@v4
	with:
	repository: ${{ env.BENCHMARK_REPO }}
	ref: ${{ env.BENCHMARK_BRANCH }}
	submodules: recursive
	path: benchmark

	- name: Install pytorch benchmark
	if: ${{ matrix.suite == 'torchbench' }}
	run: \|
	cd benchmark
	python install.py
	pip install -e .

	- name: Run e2e performance benchmarks
	env:
	HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	run: \|
	export WORKSPACE=$GITHUB_WORKSPACE

	if [[ "${{ inputs.TORCH_COMPILE_DEBUG }}" == "1" ]] ; then
	export TORCH_COMPILE_DEBUG="1"
	# torch will save debug logs to $TORCH_COMPILE_DEBUG_DIR/torch_compile_debug
	export TORCH_COMPILE_DEBUG_DIR=$GITHUB_WORKSPACE
	fi

	cd pytorch

	# if "only_one_model" is set, then test this model
	# if "models" == "subset", then test the models from .github/models/performance/{suite}.txt
	# otherwise test all models

	if [[ "${{ inputs.only_one_model }}" ]]; then
	$GITHUB_WORKSPACE/scripts/inductor_xpu_test.sh ${{ matrix.suite }} ${{ matrix.dtype }} ${{ matrix.mode }} performance xpu 0 static 1 0 ${{ inputs.only_one_model }}
	elif [[ "${{ needs.setup.outputs.models }}" == "subset" ]]; then
	while read model; do
	$GITHUB_WORKSPACE/scripts/inductor_xpu_test.sh ${{ matrix.suite }} ${{ matrix.dtype }} ${{ matrix.mode }} performance xpu 0 static 1 0 $model
	done < $GITHUB_WORKSPACE/.github/models/performance/${{ matrix.suite }}.txt
	else
	$GITHUB_WORKSPACE/scripts/inductor_xpu_test.sh ${{ matrix.suite }} ${{ matrix.dtype }} ${{ matrix.mode }} performance xpu 0 static 1 0
	fi

	- name: Report environment details
	run: \|
	mkdir -p inductor_log
	TIMESTAMP=$(date '+%Y%m%d%H%M%S')
	echo "TIMESTAMP=$TIMESTAMP" >> "${GITHUB_ENV}"

	source ./scripts/capture-hw-details.sh --quiet

	cat <<EOF \| tee inductor_log/.env
	TIMESTAMP=$TIMESTAMP
	JOB_NAME=${{ join(matrix.*, '-') }}
	GITHUB_RUN_ID=$GITHUB_RUN_ID
	GITHUB_RUN_NUMBER=$GITHUB_RUN_NUMBER
	GITHUB_RUN_ATTEMPT=$GITHUB_RUN_ATTEMPT
	PYTHON_VERSION=$PYTHON_VERSION
	PYTORCH_REPO=$PYTORCH_REPO
	PYTORCH_COMMIT_ID=$PYTORCH_COMMIT_ID
	IPEX_REPO=$IPEX_REPO
	IPEX_COMMIT_ID=$IPEX_COMMIT_ID
	LLVM_REPO=llvm/llvm-project
	LLVM_COMMIT_ID=$LLVM_COMMIT_ID
	BENCHMARK_REPO=$BENCHMARK_REPO
	BENCHMARK_COMMIT_ID=$BENCHMARK_COMMIT_ID
	TRITON_REPO=$GITHUB_REPOSITORY
	TRITON_COMMIT_ID=$GITHUB_SHA
	TORCHVISION_COMMIT_ID=$TORCHVISION_COMMIT_ID
	TORCHTEXT_COMMIT_ID=$TORCHTEXT_COMMIT_ID
	TORCHAUDIO_COMMIT_ID=$TORCHAUDIO_COMMIT_ID
	TRANSFORMERS_VERSION=$TRANSFORMERS_VERSION
	TIMM_COMMIT_ID=$TIMM_COMMIT_ID
	LIBIGC1_VERSION=$LIBIGC1_VERSION
	LEVEL_ZERO_VERSION=$LEVEL_ZERO_VERSION
	GPU_DEVICE=$GPU_DEVICE
	AGAMA_VERSION=$AGAMA_VERSION
	EOF

	- name: Copy reports
	run: \|
	if [[ -d torch_compile_debug ]]; then
	cp -rT torch_compile_debug inductor_log
	fi

	- name: Upload test logs
	uses: actions/upload-artifact@v4
	with:
	name: logs-${{ join(matrix.*, '-') }}
	path: inductor_log

	- name: Save pip cache
	if: ${{ steps.pip-cache.outputs.status == 'miss' }}
	uses: ./.github/actions/save
	with:
	path: ${{ steps.pip-cache.outputs.path }}
	dest: ${{ steps.pip-cache.outputs.dest }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

E2E performance #130

Workflow file

E2E performance #130

Jobs

Run details

Workflow file for this run