Skip to content

Commit

Permalink
Merge branch 'main' into meng_ut
Browse files Browse the repository at this point in the history
  • Loading branch information
chunhuanMeng authored Jan 3, 2025
2 parents 9df808a + f634c3c commit 47ac81a
Show file tree
Hide file tree
Showing 26 changed files with 500 additions and 55 deletions.
4 changes: 2 additions & 2 deletions .github/ci_expected_accuracy/check_expected.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@


# load csv files
test_data= pd.read_csv(args.csv_file)
test_data= pd.read_csv(args.csv_file, comment='#')
# test_data = test_data.reset_index() # make sure indexes pair with number of rows
# test_data = test_data.sort_values(by=["name"], ascending=True)
test_names = [row["name"] for index, row in test_data.iterrows()]

current_path = pathlib.Path(__file__).parent.resolve()
refer_file = str(current_path) + "/" + args.category + "_" + args.suite + "_" + args.mode + ".csv"
refer_data= pd.read_csv(refer_file)
refer_data= pd.read_csv(refer_file, comment='#')
# refer_data = refer_data.reset_index() # make sure indexes pair with number of rows
# refer_data = refer_data.sort_values(by=["name"], ascending=True)
refer_names = [row["name"] for index, row in refer_data.iterrows()]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ CamemBert,pass,pass,pass,pass,pass
DebertaForMaskedLM,pass,pass,pass,pass,pass
DebertaForQuestionAnswering,pass,pass,pass,pass,pass
DebertaV2ForMaskedLM,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip
DebertaV2ForQuestionAnswering,pass,pass,pass,pass,pass
# Skip DebertaV2ForQuestionAnswering issue: https://github.com/intel/torch-xpu-ops/issues/1216
DebertaV2ForQuestionAnswering,fail_accuracy,fail_accuracy,fail_accuracy,pass,pass
DistilBertForMaskedLM,pass,pass,pass,pass,pass
DistilBertForQuestionAnswering,pass,pass,pass,pass,pass
DistillGPT2,pass,pass,pass,pass,pass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,5 +102,6 @@ torch_multimodal_clip,pass,pass,pass,eager_fail_to_run,eager_fail_to_run
tts_angular,pass,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
vgg16,pass,pass,pass,pass,pass
vision_maskrcnn,pass,pass,pass,eager_fail_to_run,eager_fail_to_run
yolov3,pass,pass,pass,pass,pass
# Skip yolov3 for known torchbench issue: https://github.com/intel/torch-xpu-ops/issues/1229
yolov3,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
hf_Roberta_base,pass,pass,pass,pass,pass
Original file line number Diff line number Diff line change
Expand Up @@ -102,5 +102,6 @@ torch_multimodal_clip,pass,pass,pass,eager_fail_to_run,eager_fail_to_run
tts_angular,pass,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
vgg16,pass,pass,pass,pass,pass
vision_maskrcnn,pass,pass,pass,eager_fail_to_run,eager_fail_to_run
yolov3,pass,pass,pass,pass,pass
# Skip yolov3 for known torchbench issue: https://github.com/intel/torch-xpu-ops/issues/1229
yolov3,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run
hf_Roberta_base,pass,pass,pass,pass,pass
31 changes: 31 additions & 0 deletions .github/workflows/_linux_transformers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ jobs:
python: ${{ inputs.python != '' && inputs.python || '3.10' }}
pytorch: ${{ inputs.pytorch != '' && inputs.pytorch || 'nightly' }}
transformers: ${{ inputs.transformers != '' && inputs.transformers || 'v4.47.0' }}
PYTORCH_DEBUG_XPU_FALLBACK: '1'
TRANSFORMERS_TEST_DEVICE_SPEC: 'spec.py'
steps:
- name: Checkout torch-xpu-ops
Expand Down Expand Up @@ -112,6 +113,8 @@ jobs:
lspci -d ::0380 | tee ${{ github.workspace }}/transformers/tests_log/lspci_0380.txt
echo "GPU render nodes:"
cat /sys/class/drm/render*/device/device | tee ${{ github.workspace }}/transformers/tests_log/device_IDs.txt
echo "xpu-smi output:"
xpu-smi discovery -y --json --dump -1
- name: Sanitry check installed packages
run: |
source activate huggingface_transformers_test
Expand Down Expand Up @@ -267,6 +270,32 @@ jobs:
echo "| $test_group | $file | $error | $comment |"
done <_failures_uniq.txt
} >> $GITHUB_STEP_SUMMARY
- name: Print not implemented XPU backend ops
run: |
cd transformers
{
echo "### Not implemented ops"
echo "| Test group | Operator | Status |"
echo "| --- | --- | --- |"
rm -rf _ops.txt && touch _ops.txt
for log in $(find reports -name failures_line.txt); do
# Each failure_line.txt is located in: reports/$test_group/failure_line.txt
test_group=$(echo $log | cut -f2 -d/)
ops=$(grep NotImplementedError $log | grep "for the XPU device" | sed "s/.*The operator '\(.*\)' is not.*/\1/")
for op in $ops; do
echo "| $test_group | <pre>$op</pre> | not implemented |" >> _ops.txt
done
done
for log in $(find reports -name warnings.txt); do
# Each warnings.txt is located in: reports/$test_group/warnings.txt
test_group=$(echo $log | cut -f2 -d/)
ops=$(grep UserWarning $log | grep "on the XPU backend" | sed "s/.*The operator '\(.*\) on the XPU.*/\1/")
for op in $ops; do
echo "| $test_group | <pre>$op</pre> | fallback to CPU happens |" >> _ops.txt
done
done
sort _ops.txt | uniq
} >> $GITHUB_STEP_SUMMARY
- name: Print annotations
if: ${{ ! cancelled() }}
run: |
Expand Down Expand Up @@ -313,6 +342,8 @@ jobs:
# printing annotations with key environment variables
echo "| jobs.$GITHUB_JOB.env.ZE_AFFINITY_MASK | $ZE_AFFINITY_MASK |"
echo "| jobs.$GITHUB_JOB.env.NEOReadDebugKeys | $NEOReadDebugKeys |"
echo "| jobs.$GITHUB_JOB.env.PYTORCH_ENABLE_XPU_FALLBACK | $PYTORCH_ENABLE_XPU_FALLBACK |"
echo "| jobs.$GITHUB_JOB.env.PYTORCH_DEBUG_XPU_FALLBACK | $PYTORCH_DEBUG_XPU_FALLBACK |"
} >> $GITHUB_STEP_SUMMARY
- name: Upload Test log
if: ${{ ! cancelled() }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nightly_ondemand.yml
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ jobs:
fi
echo "TORCH_BRANCH_ID=$(git rev-parse --abbrev-ref HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCH_COMMIT_ID=$(git rev-parse HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCHBENCH_COMMIT_ID=$(<third_party/torch-xpu-ops/.github/ci_commit_pins/torchbench.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCHBENCH_COMMIT_ID=$(<.github/ci_commit_pins/torchbench.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCHVISION_COMMIT_ID=$(<.github/ci_commit_pins/vision.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nightly_ondemand_rolling.yml
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ jobs:
fi
echo "TORCH_BRANCH_ID=$(git rev-parse --abbrev-ref HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCH_COMMIT_ID=$(git rev-parse HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCHBENCH_COMMIT_ID=$(<third_party/torch-xpu-ops/.github/ci_commit_pins/torchbench.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCHBENCH_COMMIT_ID=$(<.github/ci_commit_pins/torchbench.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCHVISION_COMMIT_ID=$(<.github/ci_commit_pins/vision.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nightly_ondemand_whl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ jobs:
echo "TORCHAUDIO_COMMIT_ID=$(python -c 'import torchaudio; print(torchaudio.version.git_version)')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TRITON_COMMIT_ID=$(python -c 'import triton; print(triton.__version__)')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
cd ../pytorch
echo "TORCHBENCH_COMMIT_ID=$(<third_party/torch-xpu-ops/.github/ci_commit_pins/torchbench.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCHBENCH_COMMIT_ID=$(<.github/ci_commit_pins/torchbench.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "MODEL_ONLY_NAME=${{ inputs.model }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
cd ../pytorch
echo "TRITON_COMMIT_ID=$(<.ci/docker/ci_commit_pins/triton-xpu.txt)" >> "${GITHUB_ENV}"
echo "TORCHVISION_COMMIT_ID=$(<.github/ci_commit_pins/vision.txt)" >> "${GITHUB_ENV}"
echo "TORCHBENCH_COMMIT_ID=$(<third_party/torch-xpu-ops/.github/ci_commit_pins/torchbench.txt)" >> "${GITHUB_ENV}"
echo "TORCHBENCH_COMMIT_ID=$(<.github/ci_commit_pins/torchbench.txt)" >> "${GITHUB_ENV}"
echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" >> "${GITHUB_ENV}"
echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" >> "${GITHUB_ENV}"
echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" >> "${GITHUB_ENV}"
Expand Down Expand Up @@ -144,9 +144,9 @@ jobs:
run: |
rm -rf ${{ github.workspace }}/upload_files
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
failed_case=$(grep "Real failed: models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
failed_case=$(grep "Real failed models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
if [ ${failed_case} -ne 0 ];then
grep -E "Real failed: models: [1-9]|Summary for" ${{ github.workspace }}/summary_accuracy.log
grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log
exit 1
fi
- name: Upload Inductor XPU E2E Data
Expand Down
11 changes: 11 additions & 0 deletions src/ATen/native/xpu/SoftMax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,17 @@ TORCH_IMPL_FUNC(log_softmax_xpu_out)
xpu::_log_softmax_kernel(input, dim, half_to_float, output);
}

Tensor _safe_softmax_xpu(
const Tensor& self,
int64_t dim,
std::optional<ScalarType> dtype) {
// TODO: uncomment after XPU softmax support half_to_float=true
// if (self.scalar_type() == ScalarType::Half && dtype == ScalarType::Float)
// return xpu::_safe_softmax_kernel(self, dim_, true);
Tensor converted = dtype.has_value() ? self.toType(dtype.value()) : self;
return xpu::_safe_softmax_kernel(converted, dim, false);
}

Tensor masked_softmax_xpu(
const Tensor& input_,
const Tensor& mask_,
Expand Down
29 changes: 10 additions & 19 deletions src/ATen/native/xpu/sycl/BatchNormKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1287,7 +1287,7 @@ struct BatchNormTransformInputVectorizedKernelFunctor {
} else {
invstd =
static_cast<stat_accscalar_t>(1) /
device_sqrt(
std::sqrt(
static_cast<stat_accscalar_t>(var_or_invstd_[plane]) + epsilon_);
}

Expand All @@ -1302,25 +1302,16 @@ struct BatchNormTransformInputVectorizedKernelFunctor {
for (index_t feature_vec_begin = item.get_local_id(1) * VEC_SIZE;
feature_vec_begin < fs;
feature_vec_begin += VEC_SIZE * item.get_local_range(1)) {
auto remaining = fs - feature_vec_begin;
if (remaining < VEC_SIZE) {
for (index_t idx = 0; idx < remaining; ++idx) {
index_t feature = feature_vec_begin + idx;
o[feature] = static_cast<input_scalar_t>(
gamma * (i[feature] - mean) * invstd + beta);
}
} else {
using vec_t = memory::aligned_vector<input_scalar_t, VEC_SIZE>;
vec_t vec;
using vec_t = memory::aligned_vector<input_scalar_t, VEC_SIZE>;
vec_t vec;
#pragma unroll
for (int vt = 0; vt < VEC_SIZE; ++vt) {
index_t feature = feature_vec_begin + vt;
vec[vt] = static_cast<input_scalar_t>(
gamma * (i[feature] - mean) * invstd + beta);
}
input_scalar_t* write_ptr = &o[feature_vec_begin];
*(reinterpret_cast<vec_t*>(write_ptr)) = vec;
for (int vt = 0; vt < VEC_SIZE; ++vt) {
index_t feature = feature_vec_begin + vt;
vec[vt] = static_cast<input_scalar_t>(
gamma * (i[feature] - mean) * invstd + beta);
}
input_scalar_t* write_ptr = &o[feature_vec_begin];
*(reinterpret_cast<vec_t*>(write_ptr)) = vec;
}
}
}
Expand Down Expand Up @@ -1459,7 +1450,7 @@ void batch_norm_elemt_template(
auto output_ptr = (char*)output_reshaped.data_ptr();
if (output_reshaped.is_contiguous() &&
memory::can_vectorize_up_to<input_scalar_t>(output_ptr) >= 4 &&
sizeof(input_scalar_t) < sizeof(float)) {
sizeof(input_scalar_t) < sizeof(float) && input.size(2) % 4 == 0) {
auto kfn = BatchNormTransformInputVectorizedKernelFunctor<
4,
input_scalar_t,
Expand Down
Loading

0 comments on commit 47ac81a

Please sign in to comment.