fix(docs): Update HITL and add interrupt docs (#738)

langchain-ai · Dec 13, 2024 · d19858a · d19858a
1 parent 57ae1e3
commit d19858a
Show file tree

Hide file tree

Showing 26 changed files with 2,601 additions and 709 deletions.
diff --git a/.gitignore b/.gitignore
@@ -20,3 +20,4 @@ coverage/
 dist-cjs
 **/dist-cjs
 tmp/
+__pycache__
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -5,5 +5,6 @@
     "Checkpointers",
     "Pregel"
   ],
-  "typescript.tsdk": "node_modules/typescript/lib"
+  "typescript.tsdk": "node_modules/typescript/lib",
+  "python.languageServer": "None"
 }
diff --git a/docs/_scripts/notebook_convert.py b/docs/_scripts/notebook_convert.py
@@ -0,0 +1,123 @@
+import os
+import re
+from pathlib import Path
+
+import nbformat
+from nbconvert.exporters import MarkdownExporter
+from nbconvert.preprocessors import Preprocessor
+
+
+class EscapePreprocessor(Preprocessor):
+    def preprocess_cell(self, cell, resources, cell_index):
+        if cell.cell_type == "markdown":
+            # rewrite markdown links to html links (excluding image links)
+            cell.source = re.sub(
+                r"(?<!!)\[([^\]]*)\]\((?![^\)]*//)([^)]*)(?:\.ipynb)?\)",
+                r'<a href="\2">\1</a>',
+                cell.source,
+            )
+            # Fix image paths in <img> tags
+            cell.source = re.sub(
+                r'<img\s+src="\.?/img/([^"]+)"', r'<img src="../img/\1"', cell.source
+            )
+
+        elif cell.cell_type == "code":
+            # escape ``` in code
+            cell.source = cell.source.replace("```", r"\`\`\`")
+            # escape ``` in output
+            if "outputs" in cell:
+                filter_out = set()
+                for i, output in enumerate(cell["outputs"]):
+                    if "text" in output:
+                        if not output["text"].strip():
+                            filter_out.add(i)
+                            continue
+
+                        value = output["text"].replace("```", r"\`\`\`")
+                        # handle a funky case w/ references in text
+                        value = re.sub(r"\[(\d+)\](?=\[(\d+)\])", r"[\1]\\", value)
+                        output["text"] = value
+                    elif "data" in output:
+                        for key, value in output["data"].items():
+                            if isinstance(value, str):
+                                value = value.replace("```", r"\`\`\`")
+                                # handle a funky case w/ references in text
+                                output["data"][key] = re.sub(
+                                    r"\[(\d+)\](?=\[(\d+)\])", r"[\1]\\", value
+                                )
+                cell["outputs"] = [
+                    output
+                    for i, output in enumerate(cell["outputs"])
+                    if i not in filter_out
+                ]
+
+        return cell, resources
+
+
+class ExtractAttachmentsPreprocessor(Preprocessor):
+    """
+    Extracts all of the outputs from the notebook file.  The extracted
+    outputs are returned in the 'resources' dictionary.
+    """
+
+    def preprocess_cell(self, cell, resources, cell_index):
+        """
+        Apply a transformation on each cell,
+        Parameters
+        ----------
+        cell : NotebookNode cell
+            Notebook cell being processed
+        resources : dictionary
+            Additional resources used in the conversion process.  Allows
+            preprocessors to pass variables into the Jinja engine.
+        cell_index : int
+            Index of the cell being processed (see base.py)
+        """
+
+        # Get files directory if it has been specified
+
+        # Make sure outputs key exists
+        if not isinstance(resources["outputs"], dict):
+            resources["outputs"] = {}
+
+        # Loop through all of the attachments in the cell
+        for name, attach in cell.get("attachments", {}).items():
+            for mime, data in attach.items():
+                if mime not in {
+                    "image/png",
+                    "image/jpeg",
+                    "image/svg+xml",
+                    "application/pdf",
+                }:
+                    continue
+
+                # attachments are pre-rendered. Only replace markdown-formatted
+                # images with the following logic
+                attach_str = f"({name})"
+                if attach_str in cell.source:
+                    data = f"(data:{mime};base64,{data})"
+                    cell.source = cell.source.replace(attach_str, data)
+
+        return cell, resources
+
+
+exporter = MarkdownExporter(
+    preprocessors=[
+        EscapePreprocessor,
+        ExtractAttachmentsPreprocessor,
+    ],
+    template_name="mdoutput",
+    extra_template_basedirs=[
+        os.path.join(os.path.dirname(__file__), "notebook_convert_templates")
+    ],
+)
+
+
+def convert_notebook(
+    notebook_path: Path,
+) -> Path:
+    with open(notebook_path) as f:
+        nb = nbformat.read(f, as_version=4)
+
+    body, _ = exporter.from_notebook_node(nb)
+    return body
diff --git a/docs/_scripts/notebook_convert_templates/mdoutput/conf.json b/docs/_scripts/notebook_convert_templates/mdoutput/conf.json
@@ -0,0 +1,5 @@
+{
+  "mimetypes": {
+    "text/markdown": true
+  }
+}
diff --git a/docs/_scripts/notebook_convert_templates/mdoutput/index.md.j2 b/docs/_scripts/notebook_convert_templates/mdoutput/index.md.j2
@@ -0,0 +1,33 @@
+{% extends 'markdown/index.md.j2' %}
+
+{%- block traceback_line -%}
+```output
+{{ line.rstrip() | strip_ansi }}
+```
+{%- endblock traceback_line -%}
+
+{%- block stream -%}
+```output
+{{ output.text.rstrip() }}
+```
+{%- endblock stream -%}
+
+{%- block data_text scoped -%}
+```output
+{{ output.data['text/plain'].rstrip() }}
+```
+{%- endblock data_text -%}
+
+{%- block data_html scoped -%}
+```html
+{{ output.data['text/html'] | safe }} 
+```
+{%- endblock data_html -%}
+
+{%- block data_jpg scoped -%}
+![](data:image/jpg;base64,{{ output.data['image/jpeg'] }})
+{%- endblock data_jpg -%}
+
+{%- block data_png scoped -%}
+![](data:image/png;base64,{{ output.data['image/png'] }})
+{%- endblock data_png -%}
diff --git a/docs/_scripts/notebook_hooks.py b/docs/_scripts/notebook_hooks.py
@@ -0,0 +1,40 @@
+import logging
+from typing import Any, Dict
+
+from mkdocs.structure.pages import Page
+from mkdocs.structure.files import Files, File
+from notebook_convert import convert_notebook
+
+logger = logging.getLogger(__name__)
+logging.basicConfig()
+logger.setLevel(logging.INFO)
+
+
+class NotebookFile(File):
+    def is_documentation_page(self):
+        return True
+
+
+def on_files(files: Files, **kwargs: Dict[str, Any]):
+    new_files = Files([])
+    for file in files:
+        if file.src_path.endswith(".ipynb"):
+            new_file = NotebookFile(
+                path=file.src_path,
+                src_dir=file.src_dir,
+                dest_dir=file.dest_dir,
+                use_directory_urls=file.use_directory_urls,
+            )
+            new_files.append(new_file)
+        else:
+            new_files.append(file)
+    return new_files
+
+
+def on_page_markdown(markdown: str, page: Page, **kwargs: Dict[str, Any]):
+    if page.file.src_path.endswith(".ipynb"):
+        logger.info("Processing Jupyter notebook: %s", page.file.src_path)
+        body = convert_notebook(page.file.abs_src_path)
+        return body
+
+    return markdown
diff --git a/docs/docs/concepts/breakpoints.md b/docs/docs/concepts/breakpoints.md
@@ -0,0 +1,146 @@
+# Breakpoints
+
+Breakpoints pause graph execution at specific points and enable stepping through execution step by step. Breakpoints are powered by LangGraph's [**persistence layer**](./persistence.md), which saves the state after each graph step. Breakpoints can also be used to enable [**human-in-the-loop**](./human_in_the_loop.md) workflows, though we recommend using the [`interrupt` function](./human_in_the_loop.md#interrupt) for this purpose.
+
+## Requirements
+
+To use breakpoints, you will need to:
+
+1. [**Specify a checkpointer**](persistence.md#checkpoints) to save the graph state after each step.
+
+2. [**Set breakpoints**](#setting-breakpoints) to specify where execution should pause.
+
+3. **Run the graph** with a [**thread ID**](./persistence.md#threads) to pause execution at the breakpoint.
+
+4. **Resume execution** using `invoke`/`stream` (see [**The `Command` primitive**](./human_in_the_loop.md#the-command-primitive)).
+
+## Setting breakpoints
+
+There are two places where you can set breakpoints:
+
+1. **Before** or **after** a node executes by setting breakpoints at **compile time** or **run time**. We call these [**static breakpoints**](#static-breakpoints).
+
+2. **Inside** a node using the [`NodeInterrupt` error](#nodeinterrupt-error).
+
+### Static breakpoints
+
+Static breakpoints are triggered either **before** or **after** a node executes. You can set static breakpoints by specifying `interruptBefore` and `interruptAfter` at **"compile" time** or **run time**.
+
+=== "Compile time"
+
+    ```typescript
+    const graph = graphBuilder.compile({
+        interruptBefore: ["nodeA"],
+        interruptAfter: ["nodeB", "nodeC"],
+        checkpointer: ..., // Specify a checkpointer
+    });
+
+    const threadConfig = {
+        configurable: {
+            thread_id: "someThread"
+        }
+    };
+
+    // Run the graph until the breakpoint
+    await graph.invoke(inputs, threadConfig);
+
+    // Optionally update the graph state based on user input
+    await graph.updateState(update, threadConfig);
+
+    // Resume the graph
+    await graph.invoke(null, threadConfig);
+    ```
+
+=== "Run time"
+
+    ```typescript
+    await graph.invoke(
+        inputs,
+        { 
+            configurable: { thread_id: "someThread" },
+            interruptBefore: ["nodeA"],
+            interruptAfter: ["nodeB", "nodeC"]
+        }
+    );
+
+    const threadConfig = {
+        configurable: {
+            thread_id: "someThread"
+        }
+    };
+
+    // Run the graph until the breakpoint
+    await graph.invoke(inputs, threadConfig);
+
+    // Optionally update the graph state based on user input
+    await graph.updateState(update, threadConfig);
+
+    // Resume the graph
+    await graph.invoke(null, threadConfig);
+    ```
+
+    !!! note
+
+        You cannot set static breakpoints at runtime for **sub-graphs**.
+
+        If you have a sub-graph, you must set the breakpoints at compilation time.
+
+Static breakpoints can be especially useful for debugging if you want to step through the graph execution one
+node at a time or if you want to pause the graph execution at specific nodes.
+
+### `NodeInterrupt` error
+
+We recommend that you [**use the `interrupt` function instead**](#the-interrupt-function) of the `NodeInterrupt` error if you're trying to implement
+[human-in-the-loop](./human_in_the_loop.md) workflows. The `interrupt` function is easier to use and more flexible.
+
+??? node "`NodeInterrupt` error"
+
+    The developer can define some *condition* that must be met for a breakpoint to be triggered. This concept of [dynamic breakpoints](./low_level.md#dynamic-breakpoints) is useful when the developer wants to halt the graph under *a particular condition*. This uses a `NodeInterrupt`, which is a special type of error that can be thrown from within a node based upon some condition. As an example, we can define a dynamic breakpoint that triggers when the `input` is longer than 5 characters.
+
+    ```typescript
+    function myNode(state: typeof GraphAnnotation.State) {
+        if (state.input.length > 5) {
+            throw new NodeInterrupt(`Received input that is longer than 5 characters: ${state.input}`);
+        }
+        return state;
+    }
+    ```
+
+    Let's assume we run the graph with an input that triggers the dynamic breakpoint and then attempt to resume the graph execution simply by passing in `null` for the input.
+
+    ```typescript
+    // Attempt to continue the graph execution with no change to state after we hit the dynamic breakpoint 
+    for await (const event of await graph.stream(null, threadConfig)) {
+        console.log(event);
+    }
+    ```
+
+    The graph will *interrupt* again because this node will be *re-run* with the same graph state. We need to change the graph state such that the condition that triggers the dynamic breakpoint is no longer met. So, we can simply edit the graph state to an input that meets the condition of our dynamic breakpoint (< 5 characters) and re-run the node.
+
+    ```typescript
+    // Update the state to pass the dynamic breakpoint
+    await graph.updateState({ input: "foo" }, threadConfig);
+
+    for await (const event of await graph.stream(null, threadConfig)) {
+        console.log(event);
+    }
+    ```
+
+    Alternatively, what if we want to keep our current input and skip the node (`myNode`) that performs the check? To do this, we can simply perform the graph update with `"myNode"` (the node name) as the third positional argument, and pass in `null` for the values. This will make no update to the graph state, but run the update as `myNode`, effectively skipping the node and bypassing the dynamic breakpoint.
+
+    ```typescript
+    // This update will skip the node `myNode` altogether
+    await graph.updateState(null, threadConfig, "myNode");
+
+    for await (const event of await graph.stream(null, threadConfig)) {
+        console.log(event);
+    }
+    ```
+
+## Additional Resources 📚
+
+- [**Conceptual Guide: Persistence**](persistence.md): Read the persistence guide for more context about persistence.
+
+- [**Conceptual Guide: Human-in-the-loop**](human_in_the_loop.md): Read the human-in-the-loop guide for more context on integrating human feedback into LangGraph applications using breakpoints.
+
+- [**How to View and Update Past Graph State**](/langgraphjs/how-tos/time-travel): Step-by-step instructions for working with graph state that demonstrate the **replay** and **fork** actions.