From 09aefe150248b321bc3c271cb304a638414f46e6 Mon Sep 17 00:00:00 2001
From: Sergii Dymchenko <sdym@fb.com>
Date: Thu, 29 Feb 2024 08:29:10 +0000
Subject: [PATCH] Fix ouput typos (#120870)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/120870
Approved by: https://github.com/clee2000
---
 aten/src/ATen/native/Normalization.cpp           |  2 +-
 aten/src/ATen/native/cuda/Reduce.cuh             |  2 +-
 .../ATen/native/nested/NestedTensorBackward.cpp  |  8 ++++----
 modules/detectron/select_smooth_l1_loss_op.cu    |  2 +-
 modules/detectron/smooth_l1_loss_op.cu           |  2 +-
 test/test_ops.py                                 |  2 +-
 test/test_schema_check.py                        |  2 +-
 .../distributed/autograd/engine/dist_engine.cpp  |  4 ++--
 .../mobile/compatibility/backport_manager.cpp    | 16 ++++++++--------
 torch/csrc/jit/runtime/jit_trace.cpp             |  6 +++---
 torch/distributed/_spmd/iter_graph_module.py     |  2 +-
 torch/distributed/_tensor/ops/matrix_ops.py      |  2 +-
 torch/distributed/_tensor/ops/tensor_ops.py      |  2 +-
 torch/distributed/_tensor/tp_conv.py             |  2 +-
 torch/functional.py                              |  2 +-
 torch/onnx/_internal/jit_utils.py                |  4 ++--
 .../_internal/distributed/distributed_test.py    |  2 +-
 17 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/aten/src/ATen/native/Normalization.cpp b/aten/src/ATen/native/Normalization.cpp
index d27f9c588aa21..82640c682ce67 100644
--- a/aten/src/ATen/native/Normalization.cpp
+++ b/aten/src/ATen/native/Normalization.cpp
@@ -409,7 +409,7 @@ std::tuple<Tensor, Tensor, Tensor> batch_norm_backward_cpu_template(
           invstd = 1 / std::sqrt(running_var_a[f] + eps);
         }
 
-        // dot product of the Q(X) and gradOuput
+        // dot product of the Q(X) and gradOutput
         accscalar_t dotp = 0;
         reduce_iter_local.unsafe_replace_operand(
             0, in_data + f * in_channel_stride);
diff --git a/aten/src/ATen/native/cuda/Reduce.cuh b/aten/src/ATen/native/cuda/Reduce.cuh
index 19b478519e72f..1f67ee3ea63e1 100644
--- a/aten/src/ATen/native/cuda/Reduce.cuh
+++ b/aten/src/ATen/native/cuda/Reduce.cuh
@@ -1054,7 +1054,7 @@ ReduceConfig setReduceConfig(const TensorIterator& iter){
   // Case 1: "vectorize along input"
   // This case happens when we are reducing along fastest moving dimesion. In such case, threads
   // with the same threadIdx.y works on the same reduction cooperatively and will produce results
-  // for the same output. In such case, values in each loaded vector always correspond to the same ouput.
+  // for the same output. In such case, values in each loaded vector always correspond to the same output.
   //
   // Case 2: "vectorize along output"
   // This case happens when the fastest moving dimesion is not the dimension of reduction. In such case,
diff --git a/aten/src/ATen/native/nested/NestedTensorBackward.cpp b/aten/src/ATen/native/nested/NestedTensorBackward.cpp
index ef992a37c8688..54304c8f4f33f 100644
--- a/aten/src/ATen/native/nested/NestedTensorBackward.cpp
+++ b/aten/src/ATen/native/nested/NestedTensorBackward.cpp
@@ -44,16 +44,16 @@ std::tuple<Tensor, Tensor, Tensor> nested_linear_backward(
     return std::tuple<Tensor, Tensor, Tensor>{Tensor(), Tensor(), Tensor()};
   }
   Tensor grad_input, grad_weight, grad_bias;
-  auto grad_ouput_contiguous = grad_output.contiguous();
-  auto* nt_grad_output = get_nested_tensor_impl(grad_ouput_contiguous);
+  auto grad_output_contiguous = grad_output.contiguous();
+  auto* nt_grad_output = get_nested_tensor_impl(grad_output_contiguous);
   auto* nt_input = get_nested_tensor_impl(input);
   TORCH_INTERNAL_ASSERT(nt_grad_output != nullptr);
   TORCH_INTERNAL_ASSERT(nt_input != nullptr);
   TORCH_INTERNAL_ASSERT(nested_tensor_impl_is_contiguous(nt_grad_output));
-  auto grad_ouput_buffer = nt_grad_output->get_buffer();
+  auto grad_output_buffer = nt_grad_output->get_buffer();
   auto input_buffer = nt_input->get_buffer();
 
-  auto reshaped_grad = grad_ouput_buffer.reshape({-1, weight.size(0)});
+  auto reshaped_grad = grad_output_buffer.reshape({-1, weight.size(0)});
 
   if (output_mask[0]) {
     auto grad_input_buffer = at::mm(reshaped_grad, weight).view({-1});
diff --git a/modules/detectron/select_smooth_l1_loss_op.cu b/modules/detectron/select_smooth_l1_loss_op.cu
index ce68fcff634d6..72f1d563b4c92 100644
--- a/modules/detectron/select_smooth_l1_loss_op.cu
+++ b/modules/detectron/select_smooth_l1_loss_op.cu
@@ -149,7 +149,7 @@ bool SelectSmoothL1LossGradientOp<float, CUDAContext>::RunOnDevice() {
   auto& Y          = Input(1);
   auto& L          = Input(2);
   auto& S          = Input(3);
-  // Below is gradient of net w.r.t. avg_loss ("gradOuput"), should be all 1's
+  // Below is gradient of net w.r.t. avg_loss ("gradOutput"), should be all 1's
   auto& d_avg_loss = Input(4);
 
   auto* d_Y_hat = Output(0, Y_hat.sizes(), at::dtype<float>()); // gradient of net w.r.t. Y_hat ("gradInput")
diff --git a/modules/detectron/smooth_l1_loss_op.cu b/modules/detectron/smooth_l1_loss_op.cu
index ea835a4bc2b97..ad2d9148c72f0 100644
--- a/modules/detectron/smooth_l1_loss_op.cu
+++ b/modules/detectron/smooth_l1_loss_op.cu
@@ -128,7 +128,7 @@ bool SmoothL1LossGradientOp<float, CUDAContext>::RunOnDevice() {
   auto& Y          = Input(1);
   auto& alpha_in   = Input(2);
   auto& alpha_out  = Input(3);
-  auto& d_avg_loss = Input(4);  // gradient of net w.r.t. avg_loss ("gradOuput")
+  auto& d_avg_loss = Input(4);  // gradient of net w.r.t. avg_loss ("gradOutput")
   // We intentially don't compute gradients for Y, alpha_{in,out} since they
   // are not needed (can change in the future if desired)
 
diff --git a/test/test_ops.py b/test/test_ops.py
index 93e010f51a966..9d99a2cc150b6 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -1790,7 +1790,7 @@ def check_inplace_view(func, input, rs, input_size, input_strides):
 
 # A mode that when enabled runs correctness checks to ensure
 # that operators have expected tags based on their input and
-# ouput tensor properties
+# output tensor properties
 class TestTagsMode(TorchDispatchMode):
     def __torch_dispatch__(self, func, types, args=(), kwargs=None):
         if isinstance(args[0], torch.Tensor):
diff --git a/test/test_schema_check.py b/test/test_schema_check.py
index 07e6b7b001b4d..7233e5f54a9d8 100644
--- a/test/test_schema_check.py
+++ b/test/test_schema_check.py
@@ -295,7 +295,7 @@ def test_schema_check_mode_functionality_with_multiple_outputs(self):
         self.assertEqual(m_expected, m_actual)
         self.assertEqual(e_expected, e_actual)
 
-    # Tests that SchemaCheckMode wraps Torch.tensor with aliasing ouputs due to aliasing inputs
+    # Tests that SchemaCheckMode wraps Torch.tensor with aliasing outputs due to aliasing inputs
     def test_schema_check_mode_functionality_with_multiple_outputs_aliasing(self):
         x = torch.rand((3, 3))
         actual = torch.zeros(3)
diff --git a/torch/csrc/distributed/autograd/engine/dist_engine.cpp b/torch/csrc/distributed/autograd/engine/dist_engine.cpp
index 907ca7392f27c..062a15da4964c 100644
--- a/torch/csrc/distributed/autograd/engine/dist_engine.cpp
+++ b/torch/csrc/distributed/autograd/engine/dist_engine.cpp
@@ -62,9 +62,9 @@ class DistAccumulateGradCaptureHook
       autogradContext_->accumulateGrad(
           accumulateGrad_->variable, inputGrads[0], 3 /* num_expected_refs */);
     }
-    const variable_list kEmptyOuput;
+    const variable_list kEmptyOutput;
     for (const auto& hook : accumulateGrad_->post_hooks()) {
-      (*hook)(kEmptyOuput, inputGrads);
+      (*hook)(kEmptyOutput, inputGrads);
     }
     return inputGrads[0];
   }
diff --git a/torch/csrc/jit/mobile/compatibility/backport_manager.cpp b/torch/csrc/jit/mobile/compatibility/backport_manager.cpp
index 884ad1a973a4a..09c5df58f0bec 100644
--- a/torch/csrc/jit/mobile/compatibility/backport_manager.cpp
+++ b/torch/csrc/jit/mobile/compatibility/backport_manager.cpp
@@ -187,10 +187,10 @@ std::stringstream update_bytecode_version(
       "bytecode",
   };
 
-  std::stringstream ouput_model_stream;
+  std::stringstream output_model_stream;
   auto writer_func = [&](const void* buf, size_t nbytes) -> size_t {
-    ouput_model_stream.write(static_cast<const char*>(buf), nbytes);
-    return !ouput_model_stream ? 0 : nbytes;
+    output_model_stream.write(static_cast<const char*>(buf), nbytes);
+    return !output_model_stream ? 0 : nbytes;
   };
 
   PyTorchStreamWriter writer_bytecode(writer_func);
@@ -218,7 +218,7 @@ std::stringstream update_bytecode_version(
       /*use_storage_context=*/true,
       storage_context);
 
-  return ouput_model_stream;
+  return output_model_stream;
 }
 } // namespace
 
@@ -307,10 +307,10 @@ std::stringstream backport_v5_to_v4(std::stringstream& input_model_stream) {
       "bytecode",
   };
 
-  std::stringstream ouput_model_stream;
+  std::stringstream output_model_stream;
   auto writer_func = [&](const void* buf, size_t nbytes) -> size_t {
-    ouput_model_stream.write(static_cast<const char*>(buf), nbytes);
-    return !ouput_model_stream ? 0 : nbytes;
+    output_model_stream.write(static_cast<const char*>(buf), nbytes);
+    return !output_model_stream ? 0 : nbytes;
   };
 
   PyTorchStreamWriter writer(writer_func);
@@ -361,7 +361,7 @@ std::stringstream backport_v5_to_v4(std::stringstream& input_model_stream) {
   auto constants_tuple =
       c10::ivalue::Tuple::create(std::move(constants_values));
   writeArchiveV4(writer, kArchiveNameConstants, constants_tuple);
-  return ouput_model_stream;
+  return output_model_stream;
 }
 
 /*
diff --git a/torch/csrc/jit/runtime/jit_trace.cpp b/torch/csrc/jit/runtime/jit_trace.cpp
index 6a3941901d6bd..cff9d5f954e81 100644
--- a/torch/csrc/jit/runtime/jit_trace.cpp
+++ b/torch/csrc/jit/runtime/jit_trace.cpp
@@ -248,9 +248,9 @@ void insertTracingNodes(Block* block, ProfilingRecord* pr, TracingData& td) {
 
       GRAPH_DEBUG("Tracing ", getHeader(n));
       auto tracer = traceNode(n, td, stack);
-      auto ouputs_size = n->outputs().size();
-      auto iivs = pop(stack, ouputs_size);
-      for (size_t j = 0; j < ouputs_size; j++) {
+      auto outputs_size = n->outputs().size();
+      auto iivs = pop(stack, outputs_size);
+      for (size_t j = 0; j < outputs_size; j++) {
         auto& iiv = iivs[j];
         if (iiv.isTensor()) {
           auto t = iiv.toTensor();
diff --git a/torch/distributed/_spmd/iter_graph_module.py b/torch/distributed/_spmd/iter_graph_module.py
index 31243bebf91a2..f1e8e960f361b 100644
--- a/torch/distributed/_spmd/iter_graph_module.py
+++ b/torch/distributed/_spmd/iter_graph_module.py
@@ -295,7 +295,7 @@ def move_to_next_iter_before(
             raise ValueError(
                 "The target nodes for ``move_to_next_iter_before`` must "
                 "satisfy one of the following conditions: 1) the user of the "
-                "node is in the target nodes, 2) the user is the ouput of the "
+                "node is in the target nodes, 2) the user is the output of the "
                 "graph, 3) there are no users -- the node is a side-effect node. "
             )
 
diff --git a/torch/distributed/_tensor/ops/matrix_ops.py b/torch/distributed/_tensor/ops/matrix_ops.py
index 8a840a2561372..9ee8b3f2a22b6 100644
--- a/torch/distributed/_tensor/ops/matrix_ops.py
+++ b/torch/distributed/_tensor/ops/matrix_ops.py
@@ -201,7 +201,7 @@ def scaled_dot_product_attention_strategy(
         assert len(spec_list) == 6
         input_expected_specs = spec_list[3:]
         output_specs: List[Optional[DTensorSpec]] = list(spec_list[:3])
-        # fix up ouput_specs and fill in None for the int and empty tensor return values
+        # fix up output_specs and fill in None for the int and empty tensor return values
         for i in range(2, 8):
             output_specs.insert(i, None)
         if all(is_tensor_shardable(qkv_shape, spec) for spec in input_expected_specs):
diff --git a/torch/distributed/_tensor/ops/tensor_ops.py b/torch/distributed/_tensor/ops/tensor_ops.py
index bf466185a8569..3d8bbdda8943f 100644
--- a/torch/distributed/_tensor/ops/tensor_ops.py
+++ b/torch/distributed/_tensor/ops/tensor_ops.py
@@ -329,7 +329,7 @@ def gen_slice_scatter_strategy(mesh: DeviceMesh, op_schema: OpSchema) -> Strateg
 
 @register_op_strategy(aten._local_scalar_dense.default)
 def replica_only_strategy(mesh: DeviceMesh, op_schema: OpSchema) -> StrategyType:
-    """Only allow replication on the input/ouput."""
+    """Only allow replication on the input/output."""
     replicate_spec = DTensorSpec(mesh, tuple([Replicate()] * mesh.ndim))
     return OpStrategy([PlacementStrategy(replicate_spec)])
 
diff --git a/torch/distributed/_tensor/tp_conv.py b/torch/distributed/_tensor/tp_conv.py
index 65e120debd03a..ebcc981d2c93a 100644
--- a/torch/distributed/_tensor/tp_conv.py
+++ b/torch/distributed/_tensor/tp_conv.py
@@ -141,7 +141,7 @@ def tp_convolution(
         local_tensor_args = cast(Tuple[object, ...], local_tensor_args_list)
         local_results = op_call(*local_tensor_args, **local_tensor_kwargs)
 
-        # step3 remove extra ouputs from the results
+        # step3 remove extra outputs from the results
         padding_w = padding[1]
         w = local_results.size(3)
         if rank == 0:
diff --git a/torch/functional.py b/torch/functional.py
index c08343b9eb961..7c07ae348631b 100644
--- a/torch/functional.py
+++ b/torch/functional.py
@@ -1707,7 +1707,7 @@ def unravel_index(indices: Tensor, shape: Union[int, Sequence[int], torch.Size])
             tensor. All elements must be non-negative.
 
     Returns:
-        tuple of Tensors: Each ``i``-th tensor in the ouput corresponds with
+        tuple of Tensors: Each ``i``-th tensor in the output corresponds with
         dimension ``i`` of :attr:`shape`. Each tensor has the same shape as
         ``indices`` and contains one index into dimension ``i`` for each of the
         flat indices given by ``indices``.
diff --git a/torch/onnx/_internal/jit_utils.py b/torch/onnx/_internal/jit_utils.py
index f40e515d30a77..d192f35bd73b5 100644
--- a/torch/onnx/_internal/jit_utils.py
+++ b/torch/onnx/_internal/jit_utils.py
@@ -293,8 +293,8 @@ def _create_node(
             for _ in range(1, n_outputs):
                 node.addOutput()
 
-    node_ouputs = tuple(node.outputs())  # type: ignore[possibly-undefined]
-    assert len(node_ouputs) == n_outputs
+    node_outputs = tuple(node.outputs())  # type: ignore[possibly-undefined]
+    assert len(node_outputs) == n_outputs
 
     aten = domain_op.startswith("aten::")
 
diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py
index a53f8e7371c64..c0e4339e126cf 100644
--- a/torch/testing/_internal/distributed/distributed_test.py
+++ b/torch/testing/_internal/distributed/distributed_test.py
@@ -3622,7 +3622,7 @@ def _test_all_gather_coalesced_helper(
                     ]
                     assert self._run_all_gather_coalesced_and_verify(
                         output_tensor_lists, input_tensors, expected_tensors, group_id
-                    ), "output tensors do not match expected ouputs"
+                    ), "output tensors do not match expected outputs"
 
             self._barrier()