Merge branch 'master' into dev2

langchain-ai · Nov 23, 2023 · 86959a1 · 86959a1
2 parents 96229cd + 751226e
commit 86959a1
Show file tree

Hide file tree

Showing 146 changed files with 1,024 additions and 204 deletions.
diff --git a/.github/workflows/langchain_ci.yml b/.github/workflows/langchain_ci.yml
@@ -14,6 +14,7 @@ on:
       - '.github/workflows/langchain_ci.yml'
       - 'libs/*'
       - 'libs/langchain/**'
+      - 'libs/core/**'
   workflow_dispatch:  # Allows to trigger the workflow manually in GitHub UI
 
 # If another push to the same PR or branch happens while this workflow is still running,

diff --git a/.github/workflows/langchain_experimental_ci.yml b/.github/workflows/langchain_experimental_ci.yml
@@ -13,6 +13,8 @@ on:
       - '.github/workflows/langchain_experimental_ci.yml'
       - 'libs/*'
       - 'libs/experimental/**'
+      - 'libs/langchain/**'
+      - 'libs/core/**'
   workflow_dispatch:  # Allows to trigger the workflow manually in GitHub UI
 
 # If another push to the same PR or branch happens while this workflow is still running,

diff --git a/cookbook/docugami_xml_kg_rag.ipynb b/cookbook/docugami_xml_kg_rag.ipynb
@@ -52,6 +52,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "c6fb4903-f845-4907-ae14-df305891b0ff",
    "metadata": {},
    "source": [
     "## Data Loading\n",
@@ -76,17 +77,18 @@
   {
    "cell_type": "code",
    "execution_count": 45,
+   "id": "fc0767d4-9155-4591-855c-ef2e14e0e10f",
    "metadata": {},
    "outputs": [],
    "source": [
     "import os\n",
+    "import tempfile\n",
     "from pathlib import Path\n",
     "from pprint import pprint\n",
-    "import requests\n",
-    "import tempfile\n",
     "from time import sleep\n",
     "from typing import Dict, List\n",
     "\n",
+    "import requests\n",
     "from docugami import Docugami\n",
     "from docugami.types import Document as DocugamiDocument\n",
     "\n",
@@ -166,6 +168,7 @@
   {
    "cell_type": "code",
    "execution_count": 46,
+   "id": "ce0b2b21-7623-46e7-ae2c-3a9f67e8b9b9",
    "metadata": {},
    "outputs": [
     {
@@ -207,6 +210,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "01f035e5-c3f8-4d23-9d1b-8d2babdea8e9",
    "metadata": {},
    "source": [
     "If you are on the free Docugami tier, your files should be done in ~15 minutes or less depending on the number of pages uploaded and available resources (please contact Docugami for paid plans for faster processing). You can re-run the code above without reprocessing your files to continue waiting if your notebook is not continuously running (it does not re-upload)."
@@ -225,6 +229,7 @@
   {
    "cell_type": "code",
    "execution_count": 47,
+   "id": "05fcdd57-090f-44bf-a1fb-2c3609c80e34",
    "metadata": {},
    "outputs": [
     {
@@ -268,6 +273,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "bfc1f2c9-e6d4-4d98-a799-6bc30bc61661",
    "metadata": {},
    "source": [
     "The file processed by Docugami in the example above was [this one](https://data.ntsb.gov/carol-repgen/api/Aviation/ReportMain/GenerateNewestReport/192541/pdf) from the NTSB and you can look at the PDF side by side to compare the XML chunks above. \n",
@@ -278,6 +284,7 @@
   {
    "cell_type": "code",
    "execution_count": 48,
+   "id": "8a4b49e0-de78-4790-a930-ad7cf324697a",
    "metadata": {},
    "outputs": [
     {
@@ -326,6 +333,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "1cfc06bc-67d2-46dd-b04d-95efa3619d0a",
    "metadata": {},
    "source": [
     "## Docugami XML Deep Dive: Jane Doe NDA Example\n",
@@ -336,6 +344,7 @@
   {
    "cell_type": "code",
    "execution_count": 109,
+   "id": "7b697d30-1e94-47f0-87e8-f81d4b180da2",
    "metadata": {},
    "outputs": [
     {
@@ -361,6 +370,7 @@
   {
    "cell_type": "code",
    "execution_count": 98,
+   "id": "14714576-6e1d-499b-bcc8-39140bb2fd78",
    "metadata": {},
    "outputs": [
     {
@@ -415,6 +425,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "dc09ba64-4973-4471-9501-54294c1143fc",
    "metadata": {},
    "source": [
     "The Docugami XML contains extremely detailed semantics and visual bounding boxes for all elements. The `dgml-utils` library parses text and non-text elements into formats appropriate to pass into LLMs (chunked text with XML semantic labels)"
@@ -423,6 +434,7 @@
   {
    "cell_type": "code",
    "execution_count": 100,
+   "id": "2b4ece00-2e43-4254-adc9-66dbb79139a6",
    "metadata": {},
    "outputs": [
     {
@@ -460,6 +472,7 @@
   {
    "cell_type": "code",
    "execution_count": 101,
+   "id": "08350119-aa22-4ec1-8f65-b1316a0d4123",
    "metadata": {},
    "outputs": [
     {
@@ -476,6 +489,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "dca87b46-c0c2-4973-94ec-689c18075653",
    "metadata": {},
    "source": [
     "The XML markup contains structural as well as semantic tags, which provide additional semantics to the LLM for improved retrieval and generation.\n",
@@ -486,6 +500,7 @@
   {
    "cell_type": "code",
    "execution_count": 112,
+   "id": "bcac8294-c54a-4b6e-af9d-3911a69620b2",
    "metadata": {},
    "outputs": [
     {
@@ -539,8 +554,8 @@
     "from langchain.chat_models import ChatOpenAI\n",
     "from langchain.prompts import (\n",
     "    ChatPromptTemplate,\n",
-    "    SystemMessagePromptTemplate,\n",
     "    HumanMessagePromptTemplate,\n",
+    "    SystemMessagePromptTemplate,\n",
     ")\n",
     "from langchain.schema.output_parser import StrOutputParser"
    ]
@@ -610,11 +625,12 @@
    "outputs": [],
    "source": [
     "import uuid\n",
-    "from langchain.vectorstores.chroma import Chroma\n",
-    "from langchain.storage import InMemoryStore\n",
-    "from langchain.schema.document import Document\n",
+    "\n",
     "from langchain.embeddings import OpenAIEmbeddings\n",
     "from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
+    "from langchain.schema.document import Document\n",
+    "from langchain.storage import InMemoryStore\n",
+    "from langchain.vectorstores.chroma import Chroma\n",
     "\n",
     "\n",
     "def build_retriever(text_elements, tables, table_summaries):\n",
@@ -710,6 +726,7 @@
   {
    "cell_type": "code",
    "execution_count": 120,
+   "id": "636e992f-823b-496b-a082-8b4fcd479de5",
    "metadata": {},
    "outputs": [
     {
@@ -743,6 +760,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "86cad5db-81fe-4ae6-a20e-550b85fcbe96",
    "metadata": {},
    "source": [
     "# RAG on Llama2 paper\n",
@@ -753,6 +771,7 @@
   {
    "cell_type": "code",
    "execution_count": 121,
+   "id": "0e4a2f43-dd48-4ae3-8e27-7e87d169965f",
    "metadata": {},
    "outputs": [
     {
@@ -777,6 +796,7 @@
   {
    "cell_type": "code",
    "execution_count": 124,
+   "id": "56b78fb3-603d-4343-ae72-be54a3c5dd72",
    "metadata": {},
    "outputs": [
     {
@@ -801,6 +821,7 @@
   {
    "cell_type": "code",
    "execution_count": 125,
+   "id": "d3cc5ba9-8553-4eda-a5d1-b799751186af",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -812,6 +833,7 @@
   {
    "cell_type": "code",
    "execution_count": 126,
+   "id": "d7c73faf-74cb-400d-8059-b69e2493de38",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -823,6 +845,7 @@
   {
    "cell_type": "code",
    "execution_count": 127,
+   "id": "4c553722-be42-42ce-83b8-76a17f323f1c",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -832,6 +855,7 @@
   {
    "cell_type": "code",
    "execution_count": 128,
+   "id": "65dce40b-f1c3-494a-949e-69a9c9544ddb",
    "metadata": {},
    "outputs": [
     {
@@ -851,6 +875,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "59877edf-9a02-45db-95cb-b7f4234abfa3",
    "metadata": {},
    "source": [
     "We can check the [trace](https://smith.langchain.com/public/5de100c3-bb40-4234-bf02-64bc708686a1/r) to see what chunks were retrieved.\n",
@@ -939,6 +964,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "0879349e-7298-4f2c-b246-f1142e97a8e5",
    "metadata": {},
    "source": []
   }

diff --git a/cookbook/llm_bash.ipynb b/cookbook/llm_bash.ipynb
@@ -69,8 +69,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from langchain_experimental.llm_bash.prompt import BashOutputParser\n",
     "from langchain.prompts.prompt import PromptTemplate\n",
+    "from langchain_experimental.llm_bash.prompt import BashOutputParser\n",
     "\n",
     "_PROMPT_TEMPLATE = \"\"\"If someone asks you to perform a task, your job is to come up with a series of bash commands that will perform the task. There is no need to put \"#!/bin/bash\" in your answer. Make sure to reason step by step, using this format:\n",
     "Question: \"copy the files in the directory named 'target' into a new directory at the same level as target called 'myNewDirectory'\"\n",

diff --git a/docs/api_reference/create_api_rst.py b/docs/api_reference/create_api_rst.py
@@ -296,7 +296,7 @@ def _document_langchain_experimental() -> None:
 def _document_langchain_core() -> None:
     """Document the langchain_core package."""
     # Generate core_api_reference.rst
-    core_members = _load_package_modules(EXP_DIR)
+    core_members = _load_package_modules(CORE_DIR)
     core_doc = ".. _core_api_reference:\n\n" + _construct_doc(
         "langchain_core", core_members
     )

diff --git a/docs/api_reference/requirements.txt b/docs/api_reference/requirements.txt
@@ -1,5 +1,6 @@
 -e libs/langchain
 -e libs/experimental
+-e libs/core
 pydantic<2
 autodoc_pydantic==1.8.0
 myst_parser

diff --git a/libs/core/langchain_core/callbacks/base.py b/libs/core/langchain_core/callbacks/base.py
@@ -1,15 +1,16 @@
 """Base callback handler that can be used to handle callbacks in langchain."""
 from __future__ import annotations
 
-from typing import Any, Dict, List, Optional, Sequence, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, TypeVar, Union
 from uuid import UUID
 
 from tenacity import RetryCallState
 
-from langchain_core.agents import AgentAction, AgentFinish
-from langchain_core.documents import Document
-from langchain_core.messages import BaseMessage
-from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult
+if TYPE_CHECKING:
+    from langchain_core.agents import AgentAction, AgentFinish
+    from langchain_core.documents import Document
+    from langchain_core.messages import BaseMessage
+    from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult
 
 
 class RetrieverManagerMixin:

diff --git a/libs/core/langchain_core/callbacks/manager.py b/libs/core/langchain_core/callbacks/manager.py
@@ -7,6 +7,7 @@
 from concurrent.futures import ThreadPoolExecutor
 from contextlib import asynccontextmanager, contextmanager
 from typing import (
+    TYPE_CHECKING,
     Any,
     AsyncGenerator,
     Coroutine,
@@ -25,7 +26,6 @@
 from langsmith.run_helpers import get_run_tree_context
 from tenacity import RetryCallState
 
-from langchain_core.agents import AgentAction, AgentFinish
 from langchain_core.callbacks.base import (
     BaseCallbackHandler,
     BaseCallbackManager,
@@ -37,11 +37,14 @@
     ToolManagerMixin,
 )
 from langchain_core.callbacks.stdout import StdOutCallbackHandler
-from langchain_core.documents import Document
 from langchain_core.messages import BaseMessage, get_buffer_string
-from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult
 from langchain_core.utils.env import env_var_is_set
 
+if TYPE_CHECKING:
+    from langchain_core.agents import AgentAction, AgentFinish
+    from langchain_core.documents import Document
+    from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult
+
 logger = logging.getLogger(__name__)
 
 

diff --git a/libs/core/langchain_core/callbacks/stdout.py b/libs/core/langchain_core/callbacks/stdout.py
@@ -1,11 +1,15 @@
 """Callback Handler that prints to std out."""
-from typing import Any, Dict, List, Optional
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
-from langchain_core.agents import AgentAction, AgentFinish
 from langchain_core.callbacks.base import BaseCallbackHandler
-from langchain_core.outputs import LLMResult
 from langchain_core.utils import print_text
 
+if TYPE_CHECKING:
+    from langchain_core.agents import AgentAction, AgentFinish
+    from langchain_core.outputs import LLMResult
+
 
 class StdOutCallbackHandler(BaseCallbackHandler):
     """Callback Handler that prints to std out."""

diff --git a/libs/core/langchain_core/callbacks/streaming_stdout.py b/libs/core/langchain_core/callbacks/streaming_stdout.py
@@ -1,11 +1,15 @@
 """Callback Handler streams to stdout on new llm token."""
+from __future__ import annotations
+
 import sys
-from typing import Any, Dict, List
+from typing import TYPE_CHECKING, Any, Dict, List
 
-from langchain_core.agents import AgentAction, AgentFinish
 from langchain_core.callbacks.base import BaseCallbackHandler
-from langchain_core.messages import BaseMessage
-from langchain_core.outputs import LLMResult
+
+if TYPE_CHECKING:
+    from langchain_core.agents import AgentAction, AgentFinish
+    from langchain_core.messages import BaseMessage
+    from langchain_core.outputs import LLMResult
 
 
 class StreamingStdOutCallbackHandler(BaseCallbackHandler):

diff --git a/libs/core/langchain_core/documents/__init__.py b/libs/core/langchain_core/documents/__init__.py
@@ -0,0 +1,4 @@
+from langchain_core.documents.base import Document
+from langchain_core.documents.transformers import BaseDocumentTransformer
+
+__all__ = ["Document", "BaseDocumentTransformer"]
diff --git a/libs/core/langchain_core/documents.py → libs/core/langchain_core/documents/base.py b/libs/core/langchain_core/documents.py → libs/core/langchain_core/documents/base.py
diff --git a/...e/langchain_core/document_transformers.py → .../langchain_core/documents/transformers.py b/...e/langchain_core/document_transformers.py → .../langchain_core/documents/transformers.py
@@ -3,9 +3,10 @@
 import asyncio
 from abc import ABC, abstractmethod
 from functools import partial
-from typing import Any, Sequence
+from typing import TYPE_CHECKING, Any, Sequence
 
-from langchain_core.documents import Document
+if TYPE_CHECKING:
+    from langchain_core.documents import Document
 
 
 class BaseDocumentTransformer(ABC):