Fix typo under torch/_functorch directory (pytorch#111067)

This PR fixes typo the the of comments and exception messages in files under `torch/_functorch` directory. Pull Request resolved: pytorch#111067 Approved by: https://github.com/Skylion007
kurtamohler · Oct 11, 2023 · 6d7744c · 6d7744c
1 parent 4d29b40
commit 6d7744c
Show file tree

Hide file tree

Showing 8 changed files with 25 additions and 25 deletions.
diff --git a/torch/_functorch/aot_autograd.py b/torch/_functorch/aot_autograd.py
@@ -382,7 +382,7 @@ def posthook(grad_input, grad_output):
 #     x_view = generate_x_view(base)
 #     x_updated = x.mul(2)
 #     x_view_updated = x_updated.view(-1)
-#     out = x_updated * x_view_udpated
+#     out = x_updated * x_view_updated
 #     return x_updated, out
 #
 # # The calling convention change from (aliases) -> (base) happens
@@ -664,7 +664,7 @@ def __post_init__(self):
         # Our forward() returns both (mutated_inputs, outputs, output_intermediate_bases, saved_tensors, saved_symints)
         self.num_forward_returns = self.num_mutated_inputs + self.num_outputs + self.num_intermediate_bases
         # In case of functionalization of rng ops, the fw_module returns one
-        # additinal output for rng offset. This rng offset is used right
+        # additional output for rng offset. This rng offset is used right
         # away to advance the rng state, and is not passed on to the raw
         # outputs. However, we need to know the exact boundary to identify
         # which tensors to be saved for the bwd graph.  num_forward captures
@@ -1053,7 +1053,7 @@ def inner(*flat_args):
                 requires_grad=isinstance(f_arg, torch.Tensor) and f_arg.requires_grad
             ))
 
-        # If a function involves creating a tensor, and returning a view of it, such that its _base is the intermediiate,
+        # If a function involves creating a tensor, and returning a view of it, such that its _base is the intermediate,
         # We need to make sure our graph returns the _base as a graph output, and we manually recreate the view
         # to return to the user. Why? The backend compiler is free to (incorrectly) not set requires_grad
         # on the base tensor, but we are obligated to properly set requires-gradness on the real output.
@@ -1173,7 +1173,7 @@ def inner(*flat_args):
                 and not o.requires_grad
             ):
                 assert len(outs_with_identical_metadata_that_require_grad) > 0
-                # In theory we could use any of these tensors to regenerat the aliased outputs from,
+                # In theory we could use any of these tensors to regenerate the aliased outputs from,
                 # since they all alias each other and have identical metatadata
                 out_alias = outs_with_identical_metadata_that_require_grad[0]
                 existing_out_idx = out_tensor_ids[id(out_alias)]
@@ -2222,7 +2222,7 @@ def merge_view_inputs(
             # to have incorrect sizes.
             example_idx = aliased_input_indices[0]
             example_alias = fwd_inputs[example_idx]
-            # Note that this function is re-used at both trace time and rutnime.
+            # Note that this function is re-used at both trace time and runtime.
             # At trace time, we're under a FakeMode so synthetic_base becomes a FakeTensor.
             synthetic_base = torch.empty((0,), dtype=example_alias.dtype, device=example_alias.device)
             # We don't actually have a convenient way of going from storage -> tensor,
@@ -3188,7 +3188,7 @@ def wrap_tensor_subclasses(
 
     # Note: [Partitioner handling for Subclasses, Part 2]
     # At the beginning of AOTAutograd, we collect metadata on the inputs and outputs of the user fw,
-    # to figure out which inputs/outputs are subclasses, and how to recontruct the subclasses after flattening them.
+    # to figure out which inputs/outputs are subclasses, and how to reconstruct the subclasses after flattening them.
     #
     # When this function is called at runtime in the forward,
     # we have been passed a list of (flattened) dense-tensor fw-outs, and need to reconstruct any subclass fw outs.
@@ -3459,7 +3459,7 @@ def aot_dispatch_autograd_graph(flat_fn, flat_args: List[Any], aot_config: AOTCo
     # There should be *NO* mutating ops in the graph at this point.
     assert_functional_graph(fx_g.graph)
 
-    # Redudant with the check above, but worth having in case tracing introduced
+    # Redundant with the check above, but worth having in case tracing introduced
     # a fake tensor. Unlikely.
     # See Note: [Fake Modules and AOTAutograd]
     torch._dynamo.utils.assert_no_fake_params_or_buffers(fx_g)
@@ -4288,7 +4288,7 @@ def unflatten(self, x):
 
 
 def create_functional_call(mod, params_spec, params_len):
-    # Redudant with dynamo, but worth having in case this gets invoked elsewhere.
+    # Redundant with dynamo, but worth having in case this gets invoked elsewhere.
     # https://github.com/pytorch/pytorch/issues/103569
 
     def functional_call(*args, **kwargs):
@@ -4308,7 +4308,7 @@ def functional_call(*args, **kwargs):
         if not isinstance(out, (tuple, list)):
             raise RuntimeError(
                 "Graph output must be a tuple(). This is so that we can avoid "
-                "pytree processing of the ouputs. Please change the module to "
+                "pytree processing of the outputs. Please change the module to "
                 "have tuple outputs or use aot_module instead."
             )
         return out
@@ -4906,7 +4906,7 @@ def aot_export_joint_simple(
     This function makes a high-level "no calling convention changes" guarantee:
     - If no inputs require grad (so we export an inference graph),
       there are *no* calling convention change between the exported graph, and "func".
-    - If at least one input requires grad (so we trace out and expot a joint fw-bw graph),
+    - If at least one input requires grad (so we trace out and export a joint fw-bw graph),
       Then if you were partition the graph into a separate forward and backward graph,
       The forward graph will have no calling convention changes compared to "func".
 

diff --git a/torch/_functorch/autograd_function.py b/torch/_functorch/autograd_function.py
@@ -211,7 +211,7 @@ def wrap_outputs_maintaining_identity(
 # that will eventually be fixed by mode-only functorch.
 # The TL;DR is that there's no way to unwrap a dead GradTensorWrapper,
 # so we (the framework) need to do it manually. Regular PyTorch operators
-# automatically do so this is consisent.
+# automatically do so this is consistent.
 #
 # class MyExp(torch.autograd.Function):
 #     @staticmethod

diff --git a/torch/_functorch/benchmark_utils.py b/torch/_functorch/benchmark_utils.py
@@ -114,15 +114,15 @@ def is_mm_conv_event(event):
 def compute_utilization(filename: str, total_length: float):
     """
     Process the chrome traces outputs by the pytorch profiler to compute GPU Utilization
-    and percent of times spent on matmal and convolution
+    and percent of times spent on matmul and convolution
 
     Args:
         filename(str): Name of chrome traces file produced by pytorch profiler
 
         total_length(float): total length of the process without profiler in second
 
     Return:
-        tuple: (GPU Utilization, percent of time spent on matmal and convolution)
+        tuple: (GPU Utilization, percent of time spent on matmul and convolution)
     """
     events = get_chrome_trace_events(filename)
 
@@ -147,7 +147,7 @@ def compute_utilization(filename: str, total_length: float):
 
 def benchmark_utilization(f, input, trace_folder, optimize_ctx=None, trace_file_name="tmp_chrome_trace", num_runs=1):
     """
-    Benchmark the GPU Utilization and percent of time spent on matmal and convolution operations of
+    Benchmark the GPU Utilization and percent of time spent on matmul and convolution operations of
     running f(input, **kwargs_for_f) with [optimize_ctx] [num_runs] times.
     It will produce a chrome trace file in trace_folder/trace_file_name.json
 
@@ -174,7 +174,7 @@ def f(a):
         num_runs: number of times to run f, excluding the warm-up runs, default to 1.
 
     Return:
-        tuple: (GPU Utilization, percent of time spent on matmal and convolution)
+        tuple: (GPU Utilization, percent of time spent on matmul and convolution)
 
     """
     isExist = os.path.exists(trace_folder)

diff --git a/torch/_functorch/compile_utils.py b/torch/_functorch/compile_utils.py
@@ -25,7 +25,7 @@ def fx_graph_cse(fx_g: torch.fx.graph.Graph):
     hash_env = {}  # map from hash to a node in the new graph
     token_map = {}  # map from hash to token
     for n in fx_g.nodes:
-        # The placeholder, output, and get_attr nodes are copied to the new grpah without change
+        # The placeholder, output, and get_attr nodes are copied to the new graph without change
         # do not CSE away random operations
         if n.op == 'placeholder' or n.op == 'output' or n.op == 'get_attr' or get_aten_target(n) in rand_ops:
             new_node = new_graph.node_copy(n, lambda x: env[x])

diff --git a/torch/_functorch/eager_transforms.py b/torch/_functorch/eager_transforms.py
@@ -1360,7 +1360,7 @@ def functionalize(func: Callable, *, remove: str = 'mutations') -> Callable:
     Returns:
         Returns a new "functionalized" function. It takes the same inputs as
         ``func``, and has the same behavior, but any mutations
-        (and optionally aliasing) performed on intermeidate tensors
+        (and optionally aliasing) performed on intermediate tensors
         in the function will be removed.
 
     functionalize will also remove mutations (and views) that were performed on function inputs.
@@ -1470,7 +1470,7 @@ def forward(self, a_1):
 
 
     Finally, a helpful mental model for understanding functionalization is that
-    most user pytorch programs are writting with the public torch API.
+    most user pytorch programs are writing with the public torch API.
     When executed, torch operators are generally decomposed into
     our internal C++ "ATen" API.
     The logic for functionalization happens entirely at the level of ATen.
@@ -1550,7 +1550,7 @@ def linearize(func: Callable, *primals) -> Tuple[Any, Callable]:
         ``func`` evaluated at ``primals``.
 
     linearize is useful if jvp is to be computed multiple times at ``primals``. However,
-    to achieve this, linearize saves intermediate computation and has higher memory requrements
+    to achieve this, linearize saves intermediate computation and has higher memory requirements
     than directly applying `jvp`. So, if all the ``tangents`` are known, it maybe more efficient
     to compute vmap(jvp) instead of using linearize.
 

diff --git a/torch/_functorch/functional_call.py b/torch/_functorch/functional_call.py
@@ -55,7 +55,7 @@ def functional_call(
             >>> new_a = {'foo': torch.zeros(()), 'foo_tied': torch.zeros(())}
             >>> functional_call(mod, new_a, torch.zeros()) # tensor(0.)
 
-    An example of passing mutliple dictionaries
+    An example of passing multiple dictionaries
 
     .. code-block:: python
 
@@ -108,8 +108,8 @@ def compute_loss(params, x, t):
         args (Any or tuple): arguments to be passed to the module call. If not a tuple, considered a single argument.
         kwargs (dict): keyword arguments to be passed to the module call
         tie_weights (bool, optional): If True, then parameters and buffers tied in the original model will be treated as
-            tied in the reparamaterized version. Therefore, if True and different values are passed for the tied
-            paramaters and buffers, it will error. If False, it will not respect the originally tied parameters and
+            tied in the reparameterized version. Therefore, if True and different values are passed for the tied
+            parameters and buffers, it will error. If False, it will not respect the originally tied parameters and
             buffers unless the values passed for both weights are the same. Default: True.
         strict (bool, optional): If True, then the parameters and buffers passed in must match the parameters and
             buffers in the original module. Therefore, if True and there are any missing or unexpected keys, it will

diff --git a/torch/_functorch/partitioners.py b/torch/_functorch/partitioners.py
@@ -624,7 +624,7 @@ def min_cut_rematerialization_partition(
 
     To create the fwd and bwd graph, we copy the joint graph, manually set the
     outputs to just original forward or backward outputs. And then we run the
-    resulting graphs through dead code elimintation.
+    resulting graphs through dead code elimination.
 
     .. warning::
         This API is experimental and likely to change.
@@ -845,7 +845,7 @@ def get_node_weight(node) -> int:
         if ban_recomputation(node) and node in required_fw_nodes:
             nx_graph.add_edge("source", node.name + "_in", capacity=math.inf)
 
-        # Checks if a node is actually a tuple. Can be simplified to just an isisinstance check if we always use faketensors.
+        # Checks if a node is actually a tuple. Can be simplified to just an isinstance check if we always use faketensors.
         is_non_tensor_node = (('val' not in node.meta and 'tensor_meta' not in node.meta) or
                               ('val' in node.meta and not isinstance(node.meta['val'], torch.Tensor)))
 

diff --git a/torch/_functorch/vmap.py b/torch/_functorch/vmap.py
@@ -365,7 +365,7 @@ def _chunked_vmap(func, flat_in_dims, chunks_flat_args, args_spec, out_dims, ran
     return tree_unflatten(flat_output, arg_spec)
 
 
-# Vmap refactored helper funcions:
+# Vmap refactored helper functions:
 def _check_randomness_arg(randomness):
     if randomness not in ['error', 'different', 'same']:
         raise RuntimeError(f"Only allowed values for randomness are 'error', 'different', or 'same'. Got {randomness}")