Skip to content

Commit

Permalink
Merge branch 'main' into log2
Browse files Browse the repository at this point in the history
  • Loading branch information
fengyuan14 committed Jul 8, 2024
2 parents 8f27bad + 441a56c commit 63eaad9
Show file tree
Hide file tree
Showing 75 changed files with 8,060 additions and 647 deletions.
13 changes: 5 additions & 8 deletions .github/actions/inductor-xpu-e2e-test/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,11 @@ runs:
contains "accuracy,performance" $scenario
$contains_status
if [ "${MODEL_ONLY_NAME}" == "" ];then
bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 0 static 8 0 &
bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 1 static 8 1 &
bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 2 static 8 2 &
bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 3 static 8 3 &
bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 4 static 8 4 &
bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 5 static 8 5 &
bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 6 static 8 6 &
bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 7 static 8 7 &
xpu_list=($(xpu-smi discovery |grep 'DRM Device: /dev/' |sed 's/.*card//;s/[^0-9].*//' |awk '{print $1 - 1":"NR - 1}'))
for xpu_id in ${xpu_list[*]}
do
bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu ${xpu_id/:*} static ${#xpu_list[*]} ${xpu_id/*:} &
done
else
bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 0 static 1 0 ${MODEL_ONLY_NAME} &
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ mobilevit_s,pass,pass,pass,pass,pass
nfnet_l0,pass,pass,pass,pass,pass
pit_b_224,pass,pass,pass,pass,pass
pnasnet5large,pass,pass,pass,pass,pass
poolformer_m36,pass,pass,fail_accuracy,pass,pass
poolformer_m36,pass,pass,pass,pass,pass
regnety_002,pass,pass,pass,pass,pass
repvgg_a2,pass,pass,pass,pass,pass
res2net101_26w_4s,pass,pass,pass,pass,pass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ mobilenetv3_large_100,pass,pass,fail_accuracy,pass,pass
mobilevit_s,pass,pass,fail_accuracy,pass,pass
nfnet_l0,pass,pass,pass,pass,pass
pit_b_224,pass,pass,pass,pass,pass
pnasnet5large,pass,pass,fail_accuracy,pass,fail_accuracy
poolformer_m36,pass,pass,fail_accuracy,pass,pass
pnasnet5large,pass,pass,pass,pass,fail_accuracy
poolformer_m36,pass,pass,pass,pass,pass
regnety_002,pass,pass,fail_accuracy,pass,pass
repvgg_a2,pass,pass,fail_accuracy,pass,pass
res2net101_26w_4s,pass,pass,fail_accuracy,pass,pass
Expand Down
14 changes: 7 additions & 7 deletions .github/ci_expected_accuracy/inductor_torchbench_inference.csv
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Background_Matting,pass_due_to_skip,pass_due_to_skip,eager_fail_to_run,pass_due_
DALLE2_pytorch,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
LearningToPaint,pass,pass,pass,pass,pass
Super_SloMo,pass,pass,pass,pass,pass
alexnet,eager_two_runs_differ,pass,eager_two_runs_differ,pass,eager_two_runs_differ
alexnet,eager_two_runs_differ,pass,pass,pass,eager_two_runs_differ
basic_gnn_edgecnn,pass,pass,pass,pass,pass
basic_gnn_gcn,pass,pass,pass,pass,pass
basic_gnn_gin,pass,pass,pass,pass,pass
Expand All @@ -20,11 +20,11 @@ detectron2_fasterrcnn_r_101_fpn,pass,eager_fail_to_run,fail_accuracy,eager_fail_
detectron2_fasterrcnn_r_50_c4,pass,eager_fail_to_run,fail_accuracy,eager_fail_to_run,fail_accuracy
detectron2_fasterrcnn_r_50_dc5,pass,eager_fail_to_run,fail_accuracy,eager_fail_to_run,fail_accuracy
detectron2_fasterrcnn_r_50_fpn,pass,eager_fail_to_run,fail_accuracy,eager_fail_to_run,fail_accuracy
detectron2_fcos_r_50_fpn,pass,fail_accuracy,fail_accuracy,pass,fail_accuracy
detectron2_fcos_r_50_fpn,pass,pass,pass,pass,pass
detectron2_maskrcnn,fail_accuracy,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
detectron2_maskrcnn_r_101_c4,fail_accuracy,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
detectron2_maskrcnn_r_101_fpn,fail_accuracy,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
detectron2_maskrcnn_r_50_c4,fail_accuracy,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
detectron2_maskrcnn_r_50_c4,pass,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
detectron2_maskrcnn_r_50_fpn,fail_accuracy,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
dlrm,pass,pass,pass,pass,pass
doctr_det_predictor,pass,pass,pass,eager_fail_to_run,pass
Expand Down Expand Up @@ -61,7 +61,7 @@ mnasnet1_0,pass,pass,pass,pass,pass
mobilenet_v2,pass,pass,pass,pass,pass
mobilenet_v2_quantized_qat,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load
mobilenet_v3_large,pass,pass,pass,pass,pass
moco,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
moco,model_fail_to_load,model_fail_to_load,model_fail_to_load,eager_fail_to_run,model_fail_to_load
moondream,pass,pass,pass,pass,pass
nanogpt,pass,pass,pass,pass,pass
nvidia_deeprecommender,pass,pass,pass,pass,pass
Expand Down Expand Up @@ -89,7 +89,7 @@ speech_transformer,pass,pass,pass,pass,pass
squeezenet1_1,pass,fail_accuracy,fail_accuracy,pass,pass
stable_diffusion_text_encoder,pass,pass,pass,pass,pass
stable_diffusion_unet,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip
tacotron2,pass,pass,pass,model_fail_to_load,model_fail_to_load
tacotron2,pass,pass,pass,model_fail_to_load,fail_to_run
timm_efficientdet,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load
timm_efficientnet,pass,pass,pass,pass,pass
timm_nfnet,pass,pass,pass,pass,pass
Expand All @@ -98,8 +98,8 @@ timm_resnest,pass,pass,pass,pass,pass
timm_vision_transformer,pass,pass,pass,pass,pass
timm_vision_transformer_large,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip
timm_vovnet,pass,pass,pass,pass,pass
torch_multimodal_clip,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
torch_multimodal_clip,pass,pass,pass,eager_fail_to_run,eager_fail_to_run
tts_angular,pass,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
vgg16,eager_two_runs_differ,pass,eager_two_runs_differ,pass,pass
vgg16,eager_two_runs_differ,pass,pass,pass,pass
vision_maskrcnn,pass,pass,pass,eager_fail_to_run,eager_fail_to_run
yolov3,pass,pass,pass,pass,pass
10 changes: 5 additions & 5 deletions .github/ci_expected_accuracy/inductor_torchbench_training.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name,float32,bfloat16,float16,amp_bf16,amp_fp16
torchrec_dlrm,fail_to_run,eager_fail_to_run,eager_fail_to_run,fail_to_run,fail_to_run
torchrec_dlrm,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,fail_to_run
BERT_pytorch,pass,pass,pass,pass,pass
Background_Matting,pass_due_to_skip,pass_due_to_skip,eager_fail_to_run,pass_due_to_skip,eager_fail_to_run
DALLE2_pytorch,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
Expand Down Expand Up @@ -53,15 +53,15 @@ hf_distil_whisper,model_fail_to_load,model_fail_to_load,model_fail_to_load,model
lennard_jones,pass,pass,pass,pass,pass
llama,pass,pass,pass,pass,pass
llama_v2_7b_16h,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip
llava,eager_fail_to_run,eager_2nd_run_fail,eager_2nd_run_fail,eager_fail_to_run,eager_fail_to_run
llava,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
maml,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
maml_omniglot,pass,pass,pass,pass,pass
microbench_unbacked_tolist_sum,pass,pass,pass,pass,pass
mnasnet1_0,pass,pass,pass,pass,pass
mobilenet_v2,pass,pass,pass,pass,pass
mobilenet_v2_quantized_qat,fail_accuracy,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
mobilenet_v3_large,pass,pass,pass,pass,pass
moco,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
moco,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load,eager_fail_to_run
moondream,pass,pass,pass,pass,pass
nanogpt,pass,pass,pass,pass,pass
nvidia_deeprecommender,pass,pass,pass,pass,pass
Expand Down Expand Up @@ -91,14 +91,14 @@ stable_diffusion_text_encoder,pass,pass,pass,pass,pass
stable_diffusion_unet,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip
tacotron2,fail_to_run,fail_to_run,fail_to_run,fail_to_run,fail_to_run
timm_efficientdet,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load
timm_efficientnet,pass,pass,pass,fail_accuracy,pass
timm_efficientnet,pass,pass,pass,pass,pass
timm_nfnet,pass,pass,pass,pass,pass
timm_regnet,pass,pass,pass,pass,pass
timm_resnest,pass,pass,pass,pass,pass
timm_vision_transformer,pass,pass,pass,pass,pass
timm_vision_transformer_large,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip
timm_vovnet,pass,pass,pass,pass,pass
torch_multimodal_clip,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
torch_multimodal_clip,pass,pass,pass,eager_fail_to_run,eager_fail_to_run
tts_angular,pass,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
vgg16,eager_two_runs_differ,eager_two_runs_differ,eager_two_runs_differ,eager_two_runs_differ,eager_two_runs_differ
vision_maskrcnn,pass,pass,pass,eager_fail_to_run,eager_fail_to_run
Expand Down
2 changes: 2 additions & 0 deletions .github/scripts/apply_torch_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
"https://github.com/pytorch/pytorch/pull/127277",
# [Inductor][Intel GPU] Support reduction split.
"https://github.com/pytorch/pytorch/pull/129120",
# Modify the tolerance level in TIMM benchmark
"https://github.com/pytorch/pytorch/pull/129735",
]
)
parser.add_argument('--extra-pr-list', '-e', nargs='+',default=[])
Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/inductor_xpu_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,5 @@ fi
ulimit -n 1048576
ZE_AFFINITY_MASK=${CARD} \
python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --${Real_DT} -d ${DEVICE} -n10 --no-skip --dashboard \
${DT_extra} ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=7200 \
${DT_extra} ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=10800 \
--output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}_card${CARD}.log
123 changes: 123 additions & 0 deletions .github/workflows/_linux_ut.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
name: inductor-xpu-ut-test

on:
workflow_call:
inputs:
torch_xpu_ops_update:
required: false
type: string
default: 'true'
description: True means update xpu_ops when building pytorch, otherwise means not
ut_suite:
required: true
type: string
default: 'op_example,op_extended,op_ut,torch_xpu'
description: op_example,op_extended,op_ut,torch_xpu. Delimiter is comma
pytorch_branch:
required: false
type: string
default: 'main'
description: Set pytorch branch
runner:
required: true
type: string
default: 'linux.idc.xpu'
description: Set runner


jobs:
Inductor-XPU-UT-Tests:
runs-on: ${{ inputs.runner }}
timeout-minutes: 900
steps:
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
- name: Prepare Stock Pytorch
run: |
pwd
cd ../ && rm -rf pytorch
git clone -b ${{ inputs.pytorch_branch }} https://github.com/pytorch/pytorch
cd pytorch && git log -n 1 && git submodule sync && git submodule update --init --recursive
if [ -z ${{ inputs.torch_xpu_ops_update }} ]; then
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
else
if [[ ${{ inputs.torch_xpu_ops_update }} == 'true' ]]; then
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
else
echo "Not update torch-xpu-ops"
fi
fi
# Workaround for torch-xpu-ops ci test
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
- name: Build Pytorch XPU
run: |
which conda && conda clean -ay
conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \
rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK}
conda create -n xpu_op_${ZE_AFFINITY_MASK} python=3.10 cmake ninja -y
source activate xpu_op_${ZE_AFFINITY_MASK}
conda install -c intel mkl-static mkl-include -y
cd ../pytorch
pip install -r requirements.txt
export USE_XPU=1
source /opt/intel/oneapi/compiler/latest/env/vars.sh
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
python setup.py bdist_wheel
pip install --force-reinstall dist/*.whl
git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
pip install -r .ci/docker/requirements-ci.txt
- name: Run XPU OP Examples
if: contains(inputs.ut_suite, 'op_example')
run: |
cd ${{ github.workspace }}
mkdir -p ut_log
xpu-smi discovery
source /opt/intel/oneapi/compiler/latest/env/vars.sh
source activate xpu_op_${ZE_AFFINITY_MASK}
cd ${{ github.workspace }}
cd examples
pip install pytest
timeout 8000 pytest -v
- name: Run XPU OP Extended UT
if: contains(inputs.ut_suite, 'op_extended')
run: |
source /opt/intel/oneapi/compiler/latest/env/vars.sh
source activate xpu_op_${ZE_AFFINITY_MASK}
export PYTORCH_TEST_WITH_SLOW=1
cd ../pytorch/third_party/torch-xpu-ops/test/xpu/extended/
timeout 10000 python run_test_with_skip.py
- name: Run XPU OP UT
if: contains(inputs.ut_suite, 'op_ut')
run: |
source /opt/intel/oneapi/compiler/latest/env/vars.sh
source activate xpu_op_${ZE_AFFINITY_MASK}
export PYTORCH_ENABLE_XPU_FALLBACK=1
export PYTORCH_TEST_WITH_SLOW=1
cd ../pytorch/third_party/torch-xpu-ops/test/xpu
timeout 10000 python run_test_with_skip.py
# Cases run with a on-demand white list, since some suites are too
# slow to go through all operators on CPU. So add cases on-demand
# when XPU implementatoin is done.
# test_foreach, test_decomp
timeout 10000 python run_test_with_only.py
- name: Run Torch XPU UT
if: contains(inputs.ut_suite, 'torch_xpu')
run: |
source /opt/intel/oneapi/compiler/latest/env/vars.sh
source activate xpu_op_${ZE_AFFINITY_MASK}
cd ../pytorch
TEST_REPORTS_DIR=$(pwd)/test/test-reports
rm -rf "$TEST_REPORTS_DIR" && mkdir -p "$TEST_REPORTS_DIR"
# Run Pytorch XPU binary UT
for xpu_case in build/bin/*{xpu,sycl}*; do
if [[ "$xpu_case" != *"*"* && "$xpu_case" != *.so && "$xpu_case" != *.a ]]; then
case_name=$(basename "$xpu_case")
echo "Testing ${case_name} ..."
"$xpu_case" --gtest_output=xml:"$TEST_REPORTS_DIR"/"$case_name".xml
fi
done
# Run Pytorch XPU python UT
export PYTORCH_ENABLE_XPU_FALLBACK=1
sed -i 's/selected_tests = exclude_tests(XPU_BLOCKLIST.*/selected_tests = XPU_TEST/g' ./test/run_test.py
python test/run_test.py --xpu
2 changes: 1 addition & 1 deletion .github/workflows/inductor_xpu_e2e_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ jobs:
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
failed_case=$(grep "Real failed: models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
if [ ${failed_case} -ne 0 ];then
grep -E "Failed: [1-9]|Summary for" ${{ github.workspace }}/summary_accuracy.log
grep -E "Real failed: models: [1-9]|Summary for" ${{ github.workspace }}/summary_accuracy.log
exit 1
fi
- name: Upload Inductor XPU E2E Data
Expand Down
Loading

0 comments on commit 63eaad9

Please sign in to comment.