refactor: improve docstring parsing logic for Google style (#28730)

Thank you for contributing to LangChain! - [x] **PR title**: "package: description" - Where "package" is whichever of langchain, community, core, etc. is being modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI changes. - Example: "community: add foobar LLM" Description: Improved the `_parse_google_docstring` function in `langchain/core` to support parsing multi-paragraph descriptions before the `Args:` section while maintaining compliance with Google-style docstring guidelines. This change ensures better handling of docstrings with detailed function descriptions. Issue: Fixes #28628 Dependencies: None. Twitter handle: @isatyamks --------- Co-authored-by: Erick Friis <[email protected]> Co-authored-by: Chester Curme <[email protected]>
langchain-ai · Dec 18, 2024 · 90f7713 · 90f7713
1 parent 85c3bc1
commit 90f7713
Show file tree

Hide file tree

Showing 2 changed files with 83 additions and 1 deletion.
diff --git a/libs/core/langchain_core/utils/function_calling.py b/libs/core/langchain_core/utils/function_calling.py
@@ -615,7 +615,8 @@ def _parse_google_docstring(
                 arg for arg in args if arg not in ("run_manager", "callbacks", "return")
             }
             if filtered_annotations and (
-                len(docstring_blocks) < 2 or not docstring_blocks[1].startswith("Args:")
+                len(docstring_blocks) < 2
+                or not any(block.startswith("Args:") for block in docstring_blocks[1:])
             ):
                 msg = "Found invalid Google-Style docstring."
                 raise ValueError(msg)

diff --git a/libs/core/tests/unit_tests/test_tools.py b/libs/core/tests/unit_tests/test_tools.py
@@ -1190,6 +1190,87 @@ def foo5(run_manager: Optional[CallbackManagerForToolRun] = None) -> str:
     assert args_schema["description"] == expected["description"]
 
 
+def test_docstring_parsing() -> None:
+    expected = {
+        "title": "foo",
+        "description": "The foo.",
+        "type": "object",
+        "properties": {
+            "bar": {"title": "Bar", "description": "The bar.", "type": "string"},
+            "baz": {"title": "Baz", "description": "The baz.", "type": "integer"},
+        },
+        "required": ["bar", "baz"],
+    }
+
+    # Simple case
+    def foo(bar: str, baz: int) -> str:
+        """The foo.
+
+        Args:
+            bar: The bar.
+            baz: The baz.
+        """
+        return bar
+
+    as_tool = tool(foo, parse_docstring=True)
+    args_schema = _schema(as_tool.args_schema)  # type: ignore
+    assert args_schema["description"] == "The foo."
+    assert args_schema["properties"] == expected["properties"]
+
+    # Multi-line description
+    def foo2(bar: str, baz: int) -> str:
+        """The foo.
+
+        Additional description here.
+
+        Args:
+            bar: The bar.
+            baz: The baz.
+        """
+        return bar
+
+    as_tool = tool(foo2, parse_docstring=True)
+    args_schema2 = _schema(as_tool.args_schema)  # type: ignore
+    assert args_schema2["description"] == "The foo. Additional description here."
+    assert args_schema2["properties"] == expected["properties"]
+
+    # Multi-line wth Returns block
+    def foo3(bar: str, baz: int) -> str:
+        """The foo.
+
+        Additional description here.
+
+        Args:
+            bar: The bar.
+            baz: The baz.
+
+        Returns:
+            str: description of returned value.
+        """
+        return bar
+
+    as_tool = tool(foo3, parse_docstring=True)
+    args_schema3 = _schema(as_tool.args_schema)  # type: ignore
+    args_schema3["title"] = "foo2"
+    assert args_schema2 == args_schema3
+
+    # Single argument
+    def foo4(bar: str) -> str:
+        """The foo.
+
+        Args:
+            bar: The bar.
+        """
+        return bar
+
+    as_tool = tool(foo4, parse_docstring=True)
+    args_schema4 = _schema(as_tool.args_schema)  # type: ignore
+    assert args_schema4["description"] == "The foo."
+    assert args_schema4["properties"] == {
+        "bar": {"description": "The bar.", "title": "Bar", "type": "string"}
+    }
+
+
 def test_tool_invalid_docstrings() -> None:
     # Test invalid docstrings
     def foo3(bar: str, baz: int) -> str: