Skip to content

Commit

Permalink
Fix typo under torch/_inductor directory (pytorch#110530)
Browse files Browse the repository at this point in the history
This PR fixes typo of comments and messages in files under `torch/_dynamo` directory.

Pull Request resolved: pytorch#110530
Approved by: https://github.com/kit1980
  • Loading branch information
kiszk authored and pytorchmergebot committed Oct 5, 2023
1 parent 9648df1 commit 434a996
Show file tree
Hide file tree
Showing 18 changed files with 31 additions and 31 deletions.
2 changes: 1 addition & 1 deletion torch/_inductor/autotune_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def benchmark(

results = {}

# Use a ThreadExecutorPool to spread the work across the subproccesses and
# Use a ThreadExecutorPool to spread the work across the subprocesses and
# to grab subprocesses as soon as they're free.
for choice, result in zip(choices, self.executor.map(self.target, choices)):
results[choice] = result
Expand Down
2 changes: 1 addition & 1 deletion torch/_inductor/codegen/cuda/gemm_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
{{template.globals().getvalue()}}
{{instance_definition}}
// When workspace_size is not a nullptr, populates requested workspace_size and returns.
// Otherwise, compuates the Gemm kernel using the given workspace ptr.
// Otherwise, computes the Gemm kernel using the given workspace ptr.
extern "C" {
{{kernel.def_kernel(inputs=[X, W, Bias], outputs=[Y], names_str="X, W, Bias, Y", input_reorder=input_reorder)}} {
try {
Expand Down
8 changes: 4 additions & 4 deletions torch/_inductor/codegen/triton.py
Original file line number Diff line number Diff line change
Expand Up @@ -1271,7 +1271,7 @@ def __call__(self):
return None
elif assert_min and assert_max:
# The conditions need to be in parens because of Python's operator precedence.
# It'd be less error-prone to use and/or/not, which is suported by triton
# It'd be less error-prone to use and/or/not, which is supported by triton
cond = f"(0 <= {self.var}) & ({self.var} < {size_str})"
cond_print = f"0 <= {self.var} < {size_str}"
elif assert_min:
Expand Down Expand Up @@ -1918,7 +1918,7 @@ def codegen_kernel(self, name=None):
for numel in self.numels:
numel_hint = V.graph.sizevars.symbolic_hint(numel)
if not isinstance(numel_hint, (int, sympy.Integer)):
# This default heuristic hint was picked carefuly: it is
# This default heuristic hint was picked carefully: it is
# large, to ensure that we don't shrink the block size (since
# if you don't have many elements, it'd be wasteful to pick a
# large block size). Since we don't know how many elements we
Expand Down Expand Up @@ -2184,7 +2184,7 @@ def warn_mix_layout(self, kernel_name):
for arg_name in call_args:
buf = V.graph.get_buffer(arg_name)
if buf and len(buf.layout.size) == 4:
# ignore the tensor if only 1 dimention is non-zero
# ignore the tensor if only 1 dimension is non-zero
if len([x for x in buf.layout.size if x == 1]) == 3:
continue
stride_order = ir.get_stride_order(buf.layout.stride)
Expand Down Expand Up @@ -2519,7 +2519,7 @@ def codegen_comment(self, node_schedule):
if not any(
isinstance(n, ForeachKernelSchedulerNode) for n in node_schedule
):
# We probablly should look what are the nodes inside a foreach
# We probably should look what are the nodes inside a foreach
# schedule node
node_names = [
n.get_name()
Expand Down
2 changes: 1 addition & 1 deletion torch/_inductor/codegen/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -1116,7 +1116,7 @@ def write_wrapper_decl(self):
f"""std::vector<at::Tensor> {self.call_func_name}(const std::vector<at::Tensor>& inputs) {{"""
)
with self.prefix.indent():
# assign inputs and outpus in both cases so the later codegen can be simplified
# assign inputs and outputs in both cases so the later codegen can be simplified
if V.graph.aot_mode:
if config.aot_inductor.abi_compatible:
self.prefix.splice(
Expand Down
8 changes: 4 additions & 4 deletions torch/_inductor/compile_fx.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ def compile_fx_inner(
"""
Inductor API that compiles a single graph.
If you change the argument list for this funtion, make sure you
If you change the argument list for this function, make sure you
also update the call to save_args_for_compile_fx_inner below accordingly.
"""
if dynamo_utils.count_calls(gm.graph) == 0:
Expand Down Expand Up @@ -361,7 +361,7 @@ def compile_fx_inner(

# doesnt work for non-trees because the warmup run would apply mutation twice
if config.triton.cudagraph_trees:
# checking if mutation is only on paramameters/static inputs
# checking if mutation is only on parameters/static inputs
has_mutation = not all(
idx < num_fixed for idx in compiled_graph.mutated_input_idxs
)
Expand Down Expand Up @@ -1076,9 +1076,9 @@ def fw_compiler_base(
# For training
# len(orig_model_outputs) <= len(model_outputs)
# During training, most of the time the model_outputs starts with
# orignal module's outputs followed by saved activations.
# original module's outputs followed by saved activations.
# But this can be not true if the model have inplace updated tensors.
# AOTAutograd will make those tensors being returned before the orignal
# AOTAutograd will make those tensors being returned before the original
# module's output.
# To make things safe, we'll use original_output_start_index field
# set by AOTAutograd to decide where the original module outputs start.
Expand Down
4 changes: 2 additions & 2 deletions torch/_inductor/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
# enable pattern match+replace optimizations
pattern_matcher = True

# register custom graph optimizatin pass hook. so far, pre/post passes are
# register custom graph optimization pass hook. so far, pre/post passes are
# only applied before/after pattern_matcher in post_grad_passes.
#
# def my_custom_pre_pass(graph: torch.fx.graph.Graph):
Expand Down Expand Up @@ -423,7 +423,7 @@ class triton:
# the max number of spills we allow for the configs we benchmark.
# Setting this to 0 means we skip a config if it spills even a single
# register.
# Settting it to a larger value allows a config spilling a small amount
# Setting it to a larger value allows a config spilling a small amount
# of registers being benchmarked.
#
# NOTE: triton will always report >0 register spills for kernels using sin/cos.
Expand Down
6 changes: 3 additions & 3 deletions torch/_inductor/coordinate_descent_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ class CoordescTuner:
"""
The coordinate descent tuner. Tune one field/coordinate at a time.
TODO will it be necessary to tune multiple fields simultanuously.
TODO will it be necessary to tune multiple fields simultaneously.
TODO: what if both increasing and descreasing a field can improve perf.
TODO: what if both increasing and decreasing a field can improve perf.
i.e., there are multiple local optima..
"""

Expand Down Expand Up @@ -224,7 +224,7 @@ def compare_config(self, func, candidate_config, best_config, best_timing):
Check if candidate_config is better than best_config.
Return a touple of (compare_result, candidate_timing).
compare_result is true iff condidate_config is better.
compare_result is true iff candidate_config is better.
"""
log.debug("Try config %s", candidate_config)
try:
Expand Down
2 changes: 1 addition & 1 deletion torch/_inductor/decomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def non_empty_tensor(x):
elif 1 < len(filtered_tensors) < len(tensors):
# on the first call, when we remove empty tensors, we redispatch recursively
return aten.cat.default(filtered_tensors, dim)
# when no 'filtering' has occured, we raise to prevent infinite recursion (no more decomposition needed)
# when no 'filtering' has occurred, we raise to prevent infinite recursion (no more decomposition needed)
return NotImplemented


Expand Down
2 changes: 1 addition & 1 deletion torch/_inductor/fx_passes/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,7 @@ def _register_dequant_promotion_pass(pattern, pass_number):
)
def dequant_promotion(match: Match, *args, **kwargs):
# If dequant pattern used by multiply nodes,
# we will do dequant promotion. So each user node has a seperate dequant pattern connected.
# we will do dequant promotion. So each user node has a separate dequant pattern connected.
def clone_to_new_node(graph, source_node, user_node):
assert (
source_node.op == "call_function"
Expand Down
4 changes: 2 additions & 2 deletions torch/_inductor/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def decide_layout_opt(gm) -> bool:
#
# We disable layout optimization if a model contains aten._scaled_dot_product_flash_attention.
#
# An alternative is to do necessary layout convertion to make sure aten._scaled_dot_product_flash_attention's
# An alternative is to do necessary layout conversion to make sure aten._scaled_dot_product_flash_attention's
# inputs have the layout needed. But that seems to have worse perf than disabing the layout opt.
# TODO(shunting) revisit if we can still apply layout optimization to models containing sdpa while
# bringing perf gains.
Expand Down Expand Up @@ -909,7 +909,7 @@ def init_wrapper_code(self):

device_types = self.device_types.copy()
# In terms of some operations that don't have input tensors, we need to
# check the deivce of the buffers.
# check the device of the buffers.
for buffer in self.buffers:
device_types.add(buffer.get_device().type)
device_types.discard("cpu")
Expand Down
4 changes: 2 additions & 2 deletions torch/_inductor/ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -5918,7 +5918,7 @@ class InPlaceHint(ExternKernel):
Wrap the input of your inplace op to enable this behavior.
The design is based on two key decisions:
- this node is resposible for allocating the in/out buffer used by the collective.
- this node is responsible for allocating the in/out buffer used by the collective.
This is controlled by the ``should_allocate`` method that returns True here and
False for the collective node
- The scheduler special-case this node and enable it to reuse its input.
Expand Down Expand Up @@ -5958,7 +5958,7 @@ def codegen(self, wrapper):
class MultiOutputNoSizeAssert(MultiOutput):
"""
Extract partial output from a multi-output OP.
Works like MultiOutput but doesn't assert size. This must be a property guaranteed by the op emiting this.
Works like MultiOutput but doesn't assert size. This must be a property guaranteed by the op emitting this.
"""

def __init__(self, layout, input, index):
Expand Down
4 changes: 2 additions & 2 deletions torch/_inductor/lowering.py
Original file line number Diff line number Diff line change
Expand Up @@ -1553,7 +1553,7 @@ def make_fallback(kernel, layout_constraint=None, warn=True):
if torch._dynamo.config.suppress_errors:
torch._dynamo.config.suppress_errors = False
log.warning(
"A make_fallback error occured in suppress_errors config,"
"A make_fallback error occurred in suppress_errors config,"
" and suppress_errors is being disabled to surface it."
)
raise AssertionError(
Expand Down Expand Up @@ -1584,7 +1584,7 @@ def philox_rand_offset(shape):
@register_lowering(torch.ops.rngprims.philox_rand, type_promotion_kind=None)
def philox_rand(size, seed, offset, stride, device, dtype):
# stride arg is optional and will be used in future for distributed random
# ops. Currently, its ununsed.
# ops. Currently, its unused.
random_pos = ir.FixedLayout(
device,
dtype,
Expand Down
4 changes: 2 additions & 2 deletions torch/_inductor/pattern_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ def _match(self, node: List[torch.fx.Node], ctx: MatchContext):
if not isinstance(node, (list, tuple)) or len(node) == 0:
return FailedMatch("non_list")
m = Match(self)
# Propogating patterns with multiple users will ensure we don't revisit
# Propagating patterns with multiple users will ensure we don't revisit
# the same nodes
pattern_to_node = ctx.filter_multi_user_patterns()
matched = False
Expand Down Expand Up @@ -851,7 +851,7 @@ def register_replacement(
"""
Create a replacement rule based on example functions that get traced
to create patterns. This supports both training and inference when
run on a joint foward+backward graph.
run on a joint forward+backward graph.
Args:
search_fn: traced to give original pattern
Expand Down
2 changes: 1 addition & 1 deletion torch/_inductor/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1509,7 +1509,7 @@ def can_fusion_increase_peak_memory(
The current attempt is a quick, possibly hacky, heuristic to prevent the
fusion of nodes that are far away in the original order.
A better but difficult to implement heursitic would be to use live
A better but difficult to implement heurisitic would be to use live
intervals of the buffers, find region of peak pressure in the original
program and prevent fusion that crosses that peak region. We might need
special care or good approximation in this implementation, as fusion of
Expand Down
2 changes: 1 addition & 1 deletion torch/_inductor/sizevars.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def prune(index):
# Note - [On Statically Known]
#
# The statically_known_* family of functions below replaces a prior system, called maybe_guard_*. The prior system
# operated by providing esentially a question, where the size hinted values were evaluted. If the condition was
# operated by providing essentially a question, where the size hinted values were evaluated. If the condition was
# true, we add a guard and return True, otherwise, False.
#
# def maybe_guard_foo(args):
Expand Down
2 changes: 1 addition & 1 deletion torch/_inductor/triton_heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1064,7 +1064,7 @@ def reduction(size_hints, reduction_hint=False, meta=None, filename=None):
triton_config_reduction(size_hints, 64, 64),
triton_config_reduction(size_hints, 8, 512),
# halve the XBLOCK/RBLOCK compared to outer_config
# TODO: this may only be beneficial when each iteration of the reduciton
# TODO: this may only be beneficial when each iteration of the reduction
# is quite heavy. E.g. https://gist.github.com/shunting314/189a8ef69f90db9d614a823385147a72
triton_config_reduction(size_hints, 64, 4, num_warps=8),
],
Expand Down
2 changes: 1 addition & 1 deletion torch/_inductor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ def run_and_get_triton_code(fn, *args, **kwargs):
@contextlib.contextmanager
def override_lowering(aten_op, override_fn):
"""
Override the lowering of aten_op with overide_fn.
Override the lowering of aten_op with override_fn.
The first argument of override_fn is the original lowering fn.
"""
from torch._inductor import lowering
Expand Down
2 changes: 1 addition & 1 deletion torch/_inductor/wrapper_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_kernel_category_by_source_code(src_code):
def get_kernel_category(kernel_mod):
"""
Given the module defining a triton kernel, return the category of the kernel.
Cateogry can be one of:
Category can be one of:
- pointwise
- reduction
- persistent_reduction
Expand Down

0 comments on commit 434a996

Please sign in to comment.