Skip to content

Commit

Permalink
fix test err on 2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
fandaoyi committed Dec 7, 2023
1 parent b734b96 commit ebbdbd9
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 16 deletions.
8 changes: 4 additions & 4 deletions dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -643,18 +643,18 @@
::diopiSize_t diopi_size = toDiopiSize(dim);
interface: diopiMean(ctx, out, self_dtype_diopi, diopi_size);

- schema: "std.correction(Tensor self, int[1]? dim=None, *, int? correction=None, bool keepdim=False) -> Tensor"
- schema: "std.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor"
custom_code_at_the_beginning: |
std::vector<int64_t> output_shape = infer_reduce_op_shape(self.sizes(), dim.value_or(std::vector<int64_t>()), keepdim);
auto out = at::empty(output_shape, self.options());
bool unbiased = correction.value_or(1) == 1;
bool unbiased = correction.value_or(1).toLong() == 1;
::diopiSize_t diopi_size = toDiopiSize(dim);
interface: diopiStd(ctx, out, self, diopi_size, unbiased);

- schema: "std.correction_out(Tensor self, int[1]? dim=None, *, int? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)"
- schema: "std.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)"
custom_code_at_the_beginning: |
::diopiSize_t diopi_size = toDiopiSize(dim);
bool unbiased = correction.value_or(1) == 1;
bool unbiased = correction.value_or(1).toLong() == 1;
interface: diopiStd(ctx, out, self, diopi_size, unbiased);

- schema: "linear_backward(Tensor input, Tensor grad_output, Tensor weight, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)"
Expand Down
5 changes: 3 additions & 2 deletions dipu/tests/python/individual_scripts/test_op_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ def batched_dot_bmm(a, b):
# description is the column
label = "Batched dot"
sub_label = f"[{b}, {n}]"
x = torch.ones((b, n))
for num_threads in [1, 4, 16, 32]:
x = torch.ones((b, n)).cuda()
# cuda tensor, not so many dispatch threads in actual case. 16, 32]:
for num_threads in [1, 4]:
results.append(
benchmark.Timer(
stmt="batched_dot_mul_sum(x, x)",
Expand Down
17 changes: 12 additions & 5 deletions dipu/tests/run_nv_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,15 @@ function run_dipu_tests {
unset DIPU_DUMP_OP_ARGS
export PYTHONPATH=${DIPU_ROOT}/../:${PYTHONPATH}
${CDIR}/python/run_tests.sh
echo "fill_.Scalar" >> .dipu_force_fallback_op_list.config
run_test "${PYTORCH_DIR}/test/test_tensor_creation_ops.py" "$@" -v -f TestTensorCreationDIPU # --locals -f
echo "" > .dipu_force_fallback_op_list.config
# run_test "${PYTORCH_DIR}/test/test_reductions.py" "$@" -v -f TestReductionsDIPU

run_test "${PYTORCH_TEST_DIR}/test/nn/test_convolution.py" -v TestConvolutionNNDeviceTypeDIPU
# run_test "${PYTORCH_TEST_DIR}/test/test_linalg.py" "$@" -v TestLinalgDIPU

# mock cuda cause test number err, temporary ignore
# run_test "${PYTORCH_TEST_DIR}/test/test_testing.py" "$@" -v TestTestParametrizationDeviceTypeDIPU TestTestingDIPU
run_test "${PYTORCH_TEST_DIR}/test/test_type_hints.py" "$@" -v
run_test "${PYTORCH_TEST_DIR}/test/test_type_info.py" "$@" -v
Expand All @@ -17,14 +24,14 @@ function run_dipu_tests {
# run_test "${PYTORCH_TEST_DIR}/test/test_binary_ufuncs.py" "$@" -v TestBinaryUfuncsDIPU
# run_test "${PYTORCH_TEST_DIR}/test/test_torch.py" "$@" -v TestTorchDeviceTypeDIPU #--subprocess
#run_test "${PYTORCH_TEST_DIR}/test/test_indexing.py" "$@" -v TestIndexingDIPU
#run_test "${PYTORCH_TEST_DIR}/test/test_indexing.py" "$@" -v NumpyTestsDIPU
# run_test "${PYTORCH_TEST_DIR}/test/test_view_ops.py" "$@" -v TestViewOpsDIPU
run_test "${PYTORCH_TEST_DIR}/test/test_indexing.py" "$@" -v NumpyTestsDIPU
run_test "${PYTORCH_TEST_DIR}/test/test_view_ops.py" "$@" -v TestViewOpsDIPU
# run_test "${PYTORCH_TEST_DIR}/test/test_type_promotion.py" "$@" -v TestTypePromotionDIPU
# run_test "${PYTORCH_TEST_DIR}/test/test_nn.py" "$@" -v TestNN
# run_test "${PYTORCH_TEST_DIR}/test/test_ops_fwd_gradients.py" "$@" -v TestFwdGradientsDIPU
# run_test "${PYTORCH_TEST_DIR}/test/test_ops_gradients.py" "$@" -v TestBwdGradientsDIPU
run_test "${PYTORCH_TEST_DIR}/test/test_ops_fwd_gradients.py" "$@" -v TestFwdGradientsDIPU
run_test "${PYTORCH_TEST_DIR}/test/test_ops_gradients.py" "$@" -v TestBwdGradientsDIPU
# run_test "${PYTORCH_TEST_DIR}/test/test_ops.py" "$@" -v
# run_test "${PYTORCH_TEST_DIR}/test/test_shape_ops.py" "$@" -v TestShapeOpsDIPU
run_test "${PYTORCH_TEST_DIR}/test/test_shape_ops.py" "$@" -v TestShapeOpsDIPU
}

if [ "$LOGFILE" != "" ]; then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,17 @@ at::Tensor& custom_fallback_dipu__amp_update_scale_(at::Tensor& current_scale,
"found_inf must be a float tensor.");
if (static_cast<bool>(found_inf.item<float>())) {
current_scale *= backoff_factor;
growth_tracker[0] = 0;
growth_tracker.fill_(c10::Scalar(0));
} else {
// Entering this branch means we just carried out a successful step,
// so growth_tracker is incremented before comparing to growth_interval.
auto successful = growth_tracker.item<int>() + 1;
if (successful == growth_interval) {
current_scale *= growth_factor;
growth_tracker[0] = 0;
growth_tracker.fill_(c10::Scalar(0));
} else {
growth_tracker[0] = successful;
//growth_tracker in torch 2.1 is a scalar tensor. in 2.0 is a size=1 tensor.
growth_tracker.fill_(c10::Scalar(successful));
}
}
return current_scale;
Expand Down
3 changes: 2 additions & 1 deletion dipu/torch_dipu/csrc_dipu/runtime/distributed/c10dOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> broadcast_dipu_(
std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>> allreduce_dipu_(
at::TensorList tensors,
const c10::intrusive_ptr<ProcessGroup>& process_group,
const c10::intrusive_ptr<ReduceOp>& reduce_op, int64_t timeout) {
const c10::intrusive_ptr<ReduceOp>& reduce_op,
const c10::optional<at::Tensor>& sparse_indices, int64_t timeout) {
auto tensor_vec = tensors.vec();
auto work =
process_group->getBackend(dipu::DIPU_DEVICE_TYPE)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

namespace dipu {

static const size_t states_size = 200 * sizeof(4120);
static const size_t states_size = 0; // 200 * sizeof(4120);
static const size_t seed_size = sizeof(uint64_t);
static const size_t offset_size = sizeof(int64_t);
static const size_t total_size = states_size + seed_size + offset_size;
Expand Down

0 comments on commit ebbdbd9

Please sign in to comment.