From 38ae17d166a001ef6837553d1ddffa111624df27 Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Wed, 29 Nov 2023 18:02:45 -0800
Subject: [PATCH] [dynamo, docs] update dynamo backend registration docs
 (#114820)

Update docs to reflect current backend registration API. Add `lookup_backend` to root `dynamo` module.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/114820
Approved by: https://github.com/eellison
---
 .../source/torch.compiler_custom_backends.rst | 184 +++++++++---------
 torch/_dynamo/__init__.py                     |   3 +-
 2 files changed, 94 insertions(+), 93 deletions(-)
diff --git a/docs/source/torch.compiler_custom_backends.rst b/docs/source/torch.compiler_custom_backends.rst
index db980d7b3e833..47b749fcf83ba 100644
--- a/docs/source/torch.compiler_custom_backends.rst
+++ b/docs/source/torch.compiler_custom_backends.rst
@@ -45,7 +45,7 @@ You can register your backend using the ``register_backend`` decorator, for exam
 
 .. code-block:: python
 
-    from torch._dynamo.optimizations import register_backend
+    from torch._dynamo import register_backend
 
     @register_backend
     def my_compiler(gm, example_inputs):
@@ -112,7 +112,7 @@ For example,
 
 .. code-block:: python
 
-    from torch._dynamo.optimizations.training import aot_autograd
+    from torch._dynamo.backends.common import aot_autograd
     from functorch.compile import make_boxed_func
 
     def my_compiler(gm, example_inputs):
@@ -138,107 +138,107 @@ For example:
 
 .. code-block:: python
 
-   from typing import List
-   import torch
-   def my_compiler(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]):
-       print("my_compiler() called with FX graph:")
-       gm.graph.print_tabular()
-       return gm.forward  # return a python callable
-   @torch.compile(backend=my_compiler)
-   def fn(x, y):
-       a = torch.cos(x)
-       b = torch.sin(y)
-       return a + b
-   fn(torch.randn(10), torch.randn(10))
+    from typing import List
+    import torch
+    def my_compiler(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]):
+        print("my_compiler() called with FX graph:")
+        gm.graph.print_tabular()
+        return gm.forward  # return a python callable
+    @torch.compile(backend=my_compiler)
+    def fn(x, y):
+        a = torch.cos(x)
+        b = torch.sin(y)
+        return a + b
+    fn(torch.randn(10), torch.randn(10))
 
 Running the above example produces the following output:
 
 ::
 
-   my_compiler() called with FX graph:
-   opcode         name    target                                                  args        kwargs
-   -------------  ------  ------------------------------------------------------  ----------  --------
-   placeholder    x       x                                                       ()          {}
-   placeholder    y       y                                                       ()          {}
-   call_function  cos     <built-in method cos of type object at 0x7f1a894649a8>  (x,)        {}
-   call_function  sin     <built-in method sin of type object at 0x7f1a894649a8>  (y,)        {}
-   call_function  add     <built-in function add>                                 (cos, sin)  {}
-   output         output  output                                                  ((add,),)   {}
+    my_compiler() called with FX graph:
+    opcode         name    target                                                  args        kwargs
+    -------------  ------  ------------------------------------------------------  ----------  --------
+    placeholder    x       x                                                       ()          {}
+    placeholder    y       y                                                       ()          {}
+    call_function  cos     <built-in method cos of type object at 0x7f1a894649a8>  (x,)        {}
+    call_function  sin     <built-in method sin of type object at 0x7f1a894649a8>  (y,)        {}
+    call_function  add     <built-in function add>                                 (cos, sin)  {}
+    output         output  output                                                  ((add,),)   {}
 
 This works for ``torch.nn.Module`` as well as shown below:
 
 .. code-block:: python
 
-   from typing import List
-   import torch
-   def my_compiler(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]):
-       print("my_compiler() called with FX graph:")
-       gm.graph.print_tabular()
-       return gm.forward  # return a python callable
-   class MockModule(torch.nn.Module):
-       def __init__(self):
-           super().__init__()
-           self.relu = torch.nn.ReLU()
-       def forward(self, x):
-           return self.relu(torch.cos(x))
-   mod = MockModule()
-   optimized_mod = torch.compile(mod, backend=my_compiler)
-   optimized_mod(torch.randn(10))
+    from typing import List
+    import torch
+    def my_compiler(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]):
+        print("my_compiler() called with FX graph:")
+        gm.graph.print_tabular()
+        return gm.forward  # return a python callable
+    class MockModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.relu = torch.nn.ReLU()
+        def forward(self, x):
+            return self.relu(torch.cos(x))
+    mod = MockModule()
+    optimized_mod = torch.compile(mod, backend=my_compiler)
+    optimized_mod(torch.randn(10))
 
 Let’s take a look at one more example with control flow:
 
 .. code-block:: python
 
-   from typing import List
-   import torch
-   def my_compiler(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]):
-       print("my_compiler() called with FX graph:")
-       gm.graph.print_tabular()
-       return gm.forward  # return a python callable
-   @torch.compile(backend=my_compiler)
-   def toy_example(a, b):
-       x = a / (torch.abs(a) + 1)
-       if b.sum() < 0:
-           b = b * -1
-       return x * b
-   for _ in range(100):
-       toy_example(torch.randn(10), torch.randn(10))
+    from typing import List
+    import torch
+    def my_compiler(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]):
+        print("my_compiler() called with FX graph:")
+        gm.graph.print_tabular()
+        return gm.forward  # return a python callable
+    @torch.compile(backend=my_compiler)
+    def toy_example(a, b):
+        x = a / (torch.abs(a) + 1)
+        if b.sum() < 0:
+            b = b * -1
+        return x * b
+    for _ in range(100):
+        toy_example(torch.randn(10), torch.randn(10))
 
 Running this example produces the following output:
 
 ::
 
-   my_compiler() called with FX graph:
-   opcode         name     target                                                  args              kwargs
-   -------------  -------  ------------------------------------------------------  ----------------  --------
-   placeholder    a        a                                                       ()                {}
-   placeholder    b        b                                                       ()                {}
-   call_function  abs_1    <built-in method abs of type object at 0x7f8d259298a0>  (a,)              {}
-   call_function  add      <built-in function add>                                 (abs_1, 1)        {}
-   call_function  truediv  <built-in function truediv>                             (a, add)          {}
-   call_method    sum_1    sum                                                     (b,)              {}
-   call_function  lt       <built-in function lt>                                  (sum_1, 0)        {}
-   output         output   output                                                  ((truediv, lt),)  {}
-
-   my_compiler() called with FX graph:
-   opcode         name    target                   args         kwargs
-   -------------  ------  -----------------------  -----------  --------
-   placeholder    b       b                        ()           {}
-   placeholder    x       x                        ()           {}
-   call_function  mul     <built-in function mul>  (b, -1)      {}
-   call_function  mul_1   <built-in function mul>  (x, mul)     {}
-   output         output  output                   ((mul_1,),)  {}
-
-   my_compiler() called with FX graph:
-   opcode         name    target                   args       kwargs
-   -------------  ------  -----------------------  ---------  --------
-   placeholder    b       b                        ()         {}
-   placeholder    x       x                        ()         {}
-   call_function  mul     <built-in function mul>  (x, b)     {}
-   output         output  output                   ((mul,),)  {}
-
-The order of the last two graphs is nondeterministic depending
-on which one is encountered first by the just-in-time compiler.
+    my_compiler() called with FX graph:
+    opcode         name     target                                                  args              kwargs
+    -------------  -------  ------------------------------------------------------  ----------------  --------
+    placeholder    a        a                                                       ()                {}
+    placeholder    b        b                                                       ()                {}
+    call_function  abs_1    <built-in method abs of type object at 0x7f8d259298a0>  (a,)              {}
+    call_function  add      <built-in function add>                                 (abs_1, 1)        {}
+    call_function  truediv  <built-in function truediv>                             (a, add)          {}
+    call_method    sum_1    sum                                                     (b,)              {}
+    call_function  lt       <built-in function lt>                                  (sum_1, 0)        {}
+    output         output   output                                                  ((truediv, lt),)  {}
+
+    my_compiler() called with FX graph:
+    opcode         name    target                   args         kwargs
+    -------------  ------  -----------------------  -----------  --------
+    placeholder    b       b                        ()           {}
+    placeholder    x       x                        ()           {}
+    call_function  mul     <built-in function mul>  (b, -1)      {}
+    call_function  mul_1   <built-in function mul>  (x, mul)     {}
+    output         output  output                   ((mul_1,),)  {}
+
+    my_compiler() called with FX graph:
+    opcode         name    target                   args       kwargs
+    -------------  ------  -----------------------  ---------  --------
+    placeholder    b       b                        ()         {}
+    placeholder    x       x                        ()         {}
+    call_function  mul     <built-in function mul>  (x, b)     {}
+    output         output  output                   ((mul,),)  {}
+
+    The order of the last two graphs is nondeterministic depending
+    on which one is encountered first by the just-in-time compiler.
 
 Speedy Backend
 ^^^^^^^^^^^^^^
@@ -249,17 +249,17 @@ with `optimize_for_inference <https://pytorch.org/docs/stable/generated/torch.ji
 
 .. code-block:: python
 
-   def optimize_for_inference_compiler(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]):
-       scripted = torch.jit.script(gm)
-       return torch.jit.optimize_for_inference(scripted)
+    def optimize_for_inference_compiler(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]):
+        scripted = torch.jit.script(gm)
+        return torch.jit.optimize_for_inference(scripted)
 
 And then you should be able to optimize any existing code with:
 
 .. code-block:: python
 
-   @torch.compile(backend=optimize_for_inference_compiler)
-   def code_to_accelerate():
-       ...
+    @torch.compile(backend=optimize_for_inference_compiler)
+    def code_to_accelerate():
+        ...
 
 Composable Backends
 ^^^^^^^^^^^^^^^^^^^
@@ -271,17 +271,17 @@ together with the following code:
 
 .. code-block:: python
 
-   from torch._dynamo.optimizations import BACKENDS
+    from torch._dynamo import lookup_backend
     def my_compiler(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]):
         try:
-            trt_compiled = BACKENDS["tensorrt"](gm, example_inputs)
+            trt_compiled = lookup_backend("tensorrt")(gm, example_inputs)
             if trt_compiled is not None:
                 return trt_compiled
         except Exception:
             pass
         # first backend failed, try something else...
         try:
-            inductor_compiled = BACKENDS["inductor"](gm, example_inputs)
+            inductor_compiled = lookup_backend("inductor")(gm, example_inputs)
             if inductor_compiled is not None:
                 return inductor_compiled
         except Exception:
diff --git a/torch/_dynamo/__init__.py b/torch/_dynamo/__init__.py
index a939c07c45782..bbc80ebb5636d 100644
--- a/torch/_dynamo/__init__.py
+++ b/torch/_dynamo/__init__.py
@@ -1,6 +1,6 @@
 import torch
 from . import allowed_functions, convert_frame, eval_frame, resume_execution
-from .backends.registry import list_backends, register_backend
+from .backends.registry import list_backends, lookup_backend, register_backend
 from .code_context import code_context
 from .convert_frame import replay
 from .decorators import (
@@ -51,6 +51,7 @@
     "is_compiling",
     "register_backend",
     "list_backends",
+    "lookup_backend",
 ]
 
 if torch.manual_seed is torch.random.manual_seed: