Skip to content

Commit

Permalink
Fix typo under torch/_functorch directory (pytorch#111067)
Browse files Browse the repository at this point in the history
This PR fixes typo the the of comments and exception messages in files under `torch/_functorch` directory.

Pull Request resolved: pytorch#111067
Approved by: https://github.com/Skylion007
  • Loading branch information
kiszk authored and pytorchmergebot committed Oct 11, 2023
1 parent 4d29b40 commit 6d7744c
Show file tree
Hide file tree
Showing 8 changed files with 25 additions and 25 deletions.
20 changes: 10 additions & 10 deletions torch/_functorch/aot_autograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def posthook(grad_input, grad_output):
# x_view = generate_x_view(base)
# x_updated = x.mul(2)
# x_view_updated = x_updated.view(-1)
# out = x_updated * x_view_udpated
# out = x_updated * x_view_updated
# return x_updated, out
#
# # The calling convention change from (aliases) -> (base) happens
Expand Down Expand Up @@ -664,7 +664,7 @@ def __post_init__(self):
# Our forward() returns both (mutated_inputs, outputs, output_intermediate_bases, saved_tensors, saved_symints)
self.num_forward_returns = self.num_mutated_inputs + self.num_outputs + self.num_intermediate_bases
# In case of functionalization of rng ops, the fw_module returns one
# additinal output for rng offset. This rng offset is used right
# additional output for rng offset. This rng offset is used right
# away to advance the rng state, and is not passed on to the raw
# outputs. However, we need to know the exact boundary to identify
# which tensors to be saved for the bwd graph. num_forward captures
Expand Down Expand Up @@ -1053,7 +1053,7 @@ def inner(*flat_args):
requires_grad=isinstance(f_arg, torch.Tensor) and f_arg.requires_grad
))

# If a function involves creating a tensor, and returning a view of it, such that its _base is the intermediiate,
# If a function involves creating a tensor, and returning a view of it, such that its _base is the intermediate,
# We need to make sure our graph returns the _base as a graph output, and we manually recreate the view
# to return to the user. Why? The backend compiler is free to (incorrectly) not set requires_grad
# on the base tensor, but we are obligated to properly set requires-gradness on the real output.
Expand Down Expand Up @@ -1173,7 +1173,7 @@ def inner(*flat_args):
and not o.requires_grad
):
assert len(outs_with_identical_metadata_that_require_grad) > 0
# In theory we could use any of these tensors to regenerat the aliased outputs from,
# In theory we could use any of these tensors to regenerate the aliased outputs from,
# since they all alias each other and have identical metatadata
out_alias = outs_with_identical_metadata_that_require_grad[0]
existing_out_idx = out_tensor_ids[id(out_alias)]
Expand Down Expand Up @@ -2222,7 +2222,7 @@ def merge_view_inputs(
# to have incorrect sizes.
example_idx = aliased_input_indices[0]
example_alias = fwd_inputs[example_idx]
# Note that this function is re-used at both trace time and rutnime.
# Note that this function is re-used at both trace time and runtime.
# At trace time, we're under a FakeMode so synthetic_base becomes a FakeTensor.
synthetic_base = torch.empty((0,), dtype=example_alias.dtype, device=example_alias.device)
# We don't actually have a convenient way of going from storage -> tensor,
Expand Down Expand Up @@ -3188,7 +3188,7 @@ def wrap_tensor_subclasses(

# Note: [Partitioner handling for Subclasses, Part 2]
# At the beginning of AOTAutograd, we collect metadata on the inputs and outputs of the user fw,
# to figure out which inputs/outputs are subclasses, and how to recontruct the subclasses after flattening them.
# to figure out which inputs/outputs are subclasses, and how to reconstruct the subclasses after flattening them.
#
# When this function is called at runtime in the forward,
# we have been passed a list of (flattened) dense-tensor fw-outs, and need to reconstruct any subclass fw outs.
Expand Down Expand Up @@ -3459,7 +3459,7 @@ def aot_dispatch_autograd_graph(flat_fn, flat_args: List[Any], aot_config: AOTCo
# There should be *NO* mutating ops in the graph at this point.
assert_functional_graph(fx_g.graph)

# Redudant with the check above, but worth having in case tracing introduced
# Redundant with the check above, but worth having in case tracing introduced
# a fake tensor. Unlikely.
# See Note: [Fake Modules and AOTAutograd]
torch._dynamo.utils.assert_no_fake_params_or_buffers(fx_g)
Expand Down Expand Up @@ -4288,7 +4288,7 @@ def unflatten(self, x):


def create_functional_call(mod, params_spec, params_len):
# Redudant with dynamo, but worth having in case this gets invoked elsewhere.
# Redundant with dynamo, but worth having in case this gets invoked elsewhere.
# https://github.com/pytorch/pytorch/issues/103569

def functional_call(*args, **kwargs):
Expand All @@ -4308,7 +4308,7 @@ def functional_call(*args, **kwargs):
if not isinstance(out, (tuple, list)):
raise RuntimeError(
"Graph output must be a tuple(). This is so that we can avoid "
"pytree processing of the ouputs. Please change the module to "
"pytree processing of the outputs. Please change the module to "
"have tuple outputs or use aot_module instead."
)
return out
Expand Down Expand Up @@ -4906,7 +4906,7 @@ def aot_export_joint_simple(
This function makes a high-level "no calling convention changes" guarantee:
- If no inputs require grad (so we export an inference graph),
there are *no* calling convention change between the exported graph, and "func".
- If at least one input requires grad (so we trace out and expot a joint fw-bw graph),
- If at least one input requires grad (so we trace out and export a joint fw-bw graph),
Then if you were partition the graph into a separate forward and backward graph,
The forward graph will have no calling convention changes compared to "func".
Expand Down
2 changes: 1 addition & 1 deletion torch/_functorch/autograd_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def wrap_outputs_maintaining_identity(
# that will eventually be fixed by mode-only functorch.
# The TL;DR is that there's no way to unwrap a dead GradTensorWrapper,
# so we (the framework) need to do it manually. Regular PyTorch operators
# automatically do so this is consisent.
# automatically do so this is consistent.
#
# class MyExp(torch.autograd.Function):
# @staticmethod
Expand Down
8 changes: 4 additions & 4 deletions torch/_functorch/benchmark_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,15 @@ def is_mm_conv_event(event):
def compute_utilization(filename: str, total_length: float):
"""
Process the chrome traces outputs by the pytorch profiler to compute GPU Utilization
and percent of times spent on matmal and convolution
and percent of times spent on matmul and convolution
Args:
filename(str): Name of chrome traces file produced by pytorch profiler
total_length(float): total length of the process without profiler in second
Return:
tuple: (GPU Utilization, percent of time spent on matmal and convolution)
tuple: (GPU Utilization, percent of time spent on matmul and convolution)
"""
events = get_chrome_trace_events(filename)

Expand All @@ -147,7 +147,7 @@ def compute_utilization(filename: str, total_length: float):

def benchmark_utilization(f, input, trace_folder, optimize_ctx=None, trace_file_name="tmp_chrome_trace", num_runs=1):
"""
Benchmark the GPU Utilization and percent of time spent on matmal and convolution operations of
Benchmark the GPU Utilization and percent of time spent on matmul and convolution operations of
running f(input, **kwargs_for_f) with [optimize_ctx] [num_runs] times.
It will produce a chrome trace file in trace_folder/trace_file_name.json
Expand All @@ -174,7 +174,7 @@ def f(a):
num_runs: number of times to run f, excluding the warm-up runs, default to 1.
Return:
tuple: (GPU Utilization, percent of time spent on matmal and convolution)
tuple: (GPU Utilization, percent of time spent on matmul and convolution)
"""
isExist = os.path.exists(trace_folder)
Expand Down
2 changes: 1 addition & 1 deletion torch/_functorch/compile_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def fx_graph_cse(fx_g: torch.fx.graph.Graph):
hash_env = {} # map from hash to a node in the new graph
token_map = {} # map from hash to token
for n in fx_g.nodes:
# The placeholder, output, and get_attr nodes are copied to the new grpah without change
# The placeholder, output, and get_attr nodes are copied to the new graph without change
# do not CSE away random operations
if n.op == 'placeholder' or n.op == 'output' or n.op == 'get_attr' or get_aten_target(n) in rand_ops:
new_node = new_graph.node_copy(n, lambda x: env[x])
Expand Down
6 changes: 3 additions & 3 deletions torch/_functorch/eager_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1360,7 +1360,7 @@ def functionalize(func: Callable, *, remove: str = 'mutations') -> Callable:
Returns:
Returns a new "functionalized" function. It takes the same inputs as
``func``, and has the same behavior, but any mutations
(and optionally aliasing) performed on intermeidate tensors
(and optionally aliasing) performed on intermediate tensors
in the function will be removed.
functionalize will also remove mutations (and views) that were performed on function inputs.
Expand Down Expand Up @@ -1470,7 +1470,7 @@ def forward(self, a_1):
Finally, a helpful mental model for understanding functionalization is that
most user pytorch programs are writting with the public torch API.
most user pytorch programs are writing with the public torch API.
When executed, torch operators are generally decomposed into
our internal C++ "ATen" API.
The logic for functionalization happens entirely at the level of ATen.
Expand Down Expand Up @@ -1550,7 +1550,7 @@ def linearize(func: Callable, *primals) -> Tuple[Any, Callable]:
``func`` evaluated at ``primals``.
linearize is useful if jvp is to be computed multiple times at ``primals``. However,
to achieve this, linearize saves intermediate computation and has higher memory requrements
to achieve this, linearize saves intermediate computation and has higher memory requirements
than directly applying `jvp`. So, if all the ``tangents`` are known, it maybe more efficient
to compute vmap(jvp) instead of using linearize.
Expand Down
6 changes: 3 additions & 3 deletions torch/_functorch/functional_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def functional_call(
>>> new_a = {'foo': torch.zeros(()), 'foo_tied': torch.zeros(())}
>>> functional_call(mod, new_a, torch.zeros()) # tensor(0.)
An example of passing mutliple dictionaries
An example of passing multiple dictionaries
.. code-block:: python
Expand Down Expand Up @@ -108,8 +108,8 @@ def compute_loss(params, x, t):
args (Any or tuple): arguments to be passed to the module call. If not a tuple, considered a single argument.
kwargs (dict): keyword arguments to be passed to the module call
tie_weights (bool, optional): If True, then parameters and buffers tied in the original model will be treated as
tied in the reparamaterized version. Therefore, if True and different values are passed for the tied
paramaters and buffers, it will error. If False, it will not respect the originally tied parameters and
tied in the reparameterized version. Therefore, if True and different values are passed for the tied
parameters and buffers, it will error. If False, it will not respect the originally tied parameters and
buffers unless the values passed for both weights are the same. Default: True.
strict (bool, optional): If True, then the parameters and buffers passed in must match the parameters and
buffers in the original module. Therefore, if True and there are any missing or unexpected keys, it will
Expand Down
4 changes: 2 additions & 2 deletions torch/_functorch/partitioners.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,7 @@ def min_cut_rematerialization_partition(
To create the fwd and bwd graph, we copy the joint graph, manually set the
outputs to just original forward or backward outputs. And then we run the
resulting graphs through dead code elimintation.
resulting graphs through dead code elimination.
.. warning::
This API is experimental and likely to change.
Expand Down Expand Up @@ -845,7 +845,7 @@ def get_node_weight(node) -> int:
if ban_recomputation(node) and node in required_fw_nodes:
nx_graph.add_edge("source", node.name + "_in", capacity=math.inf)

# Checks if a node is actually a tuple. Can be simplified to just an isisinstance check if we always use faketensors.
# Checks if a node is actually a tuple. Can be simplified to just an isinstance check if we always use faketensors.
is_non_tensor_node = (('val' not in node.meta and 'tensor_meta' not in node.meta) or
('val' in node.meta and not isinstance(node.meta['val'], torch.Tensor)))

Expand Down
2 changes: 1 addition & 1 deletion torch/_functorch/vmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ def _chunked_vmap(func, flat_in_dims, chunks_flat_args, args_spec, out_dims, ran
return tree_unflatten(flat_output, arg_spec)


# Vmap refactored helper funcions:
# Vmap refactored helper functions:
def _check_randomness_arg(randomness):
if randomness not in ['error', 'different', 'same']:
raise RuntimeError(f"Only allowed values for randomness are 'error', 'different', or 'same'. Got {randomness}")
Expand Down

0 comments on commit 6d7744c

Please sign in to comment.