Merge branch 'main' into fix/6296

langchain-ai · Aug 1, 2024 · b0921d9 · b0921d9
2 parents 2b576d3 + 2781a34
commit b0921d9
Show file tree

Hide file tree

Showing 134 changed files with 12,526 additions and 2,382 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -4,12 +4,13 @@
 name: CI
 
 on:
-  push:
-    branches: ["main"]
   pull_request:
-    # Do not run this workflow if only docs changed.
-    paths-ignore:
+    paths_ignore:
       - 'docs/**'
+      - 'dependency_range_tests/**'
+      - 'environment_tests/**'
+      - '.github/**'
+      - '!.github/workflows/ci.yml'
   workflow_dispatch:  # Allows triggering the workflow manually in GitHub UI
 
 
@@ -38,25 +39,3 @@ jobs:
         run: yarn install --immutable --mode=skip-build
       - name: Check linting
         run: yarn run lint
-
-  test-exports:
-    uses:
-      ./.github/workflows/test-exports.yml
-    secrets: inherit
-
-  platform-compatibility:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
-    steps:
-      - uses: actions/checkout@v4
-      - name: Use Node.js ${{ env.NODE_VERSION }}
-        uses: actions/setup-node@v3
-        with:
-          node-version: ${{ env.NODE_VERSION }}
-          cache: "yarn"
-      - name: Install dependencies
-        run: yarn install --immutable
-      - name: Build `@langchain/core`
-        run: yarn build --filter=@langchain/core
diff --git a/.github/workflows/compatibility.yml b/.github/workflows/compatibility.yml
@@ -4,10 +4,11 @@ on:
   push:
     branches: ["main"]
   pull_request:
-    # Do not run this workflow if only docs/examples changed.
-    paths-ignore:
-      - 'docs/**'
-      - 'examples/**'
+    # Only run this workflow if the following directories have changed.
+    paths:
+      - 'langchain/**'
+      - 'langchain-core/**'
+      - 'libs/**'
   workflow_dispatch:  # Allows triggering the workflow manually in GitHub UI
 
 # If another push to the same PR or branch happens while this workflow is still running,

diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
@@ -3,9 +3,9 @@
 name: Format
 
 on:
-  push:
-    branches: ["main"]
   pull_request:
+    paths_ignore:
+      - '.github/**'
   workflow_dispatch:  # Allows triggering the workflow manually in GitHub UI
 
 

diff --git a/.github/workflows/platform-compatibility.yml b/.github/workflows/platform-compatibility.yml
@@ -0,0 +1,44 @@
+# This workflow will do a clean installation of node dependencies, cache/restore them, build the source code and run tests across different versions of node
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-nodejs
+
+name: Platform Compatibility
+
+on:
+  pull_request:
+    # Only run this workflow if the following directories have changed.
+    paths:
+      - 'langchain/**'
+      - 'langchain-core/**'
+      - 'libs/**'
+  workflow_dispatch:  # Allows triggering the workflow manually in GitHub UI
+
+
+# If another push to the same PR or branch happens while this workflow is still running,
+# cancel the earlier run in favor of the next run.
+#
+# There's no point in testing an outdated version of the code. GitHub only allows
+# a limited number of job runners to be active at the same time, so it's better to cancel
+# pointless jobs early so that more useful jobs can run sooner.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  platform-compatibility:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Use Node.js ${{ env.NODE_VERSION }}
+        uses: actions/setup-node@v3
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          cache: "yarn"
+      - name: Install dependencies
+        run: cd ./langchain-core && yarn workspaces focus
+      - name: Build @langchain/scripts
+        run: cd ./libs/langchain-scripts && yarn build:internal
+      - name: Build `@langchain/core`
+        run: cd ./langchain-core && yarn build:internal
diff --git a/.github/workflows/test-exports.yml b/.github/workflows/test-exports.yml
@@ -9,9 +9,7 @@ on:
       - 'libs/langchain-anthropic/**'
       - 'libs/langchain-community/**'
       - 'libs/langchain-openai/**'
-      - 'examples/**'
   workflow_dispatch: # Allows triggering the workflow manually in GitHub UI
-  workflow_call: # Allows triggering the workflow from another workflow
 
 # If another push to the same PR or branch happens while this workflow is still running,
 # cancel the earlier run in favor of the next run.

diff --git a/.github/workflows/validate_new_notebooks.yml b/.github/workflows/validate_new_notebooks.yml
@@ -0,0 +1,58 @@
+name: Validate new notebooks
+
+# If another push to the same PR or branch happens while this workflow is still running,
+# cancel the earlier run in favor of the next run.
+#
+# There's no point in testing an outdated version of the code. GitHub only allows
+# a limited number of job runners to be active at the same time, so it's better to cancel
+# pointless jobs early so that more useful jobs can run sooner.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+    paths:
+      - 'docs/core_docs/**'
+  workflow_dispatch:
+
+jobs:
+  validate-new-notebooks:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Use Node.js ${{ matrix.node-version }}
+        uses: actions/setup-node@v3
+        with:
+          node-version: ${{ matrix.node-version }}
+          cache: "yarn"
+      - name: Install dependencies
+        run: yarn install --immutable
+      - name: Get changed files
+        id: changed-files
+        uses: tj-actions/changed-files@v44
+      - name: Check for new or modified notebooks in docs/core_docs
+        id: check_notebooks
+        run: |
+          notebooks=$(echo '${{ steps.changed-files.outputs.all_changed_files }}' | tr ' ' '\n' | grep '^docs/core_docs/.*\.ipynb$' || true)
+          echo "Affected notebooks: $notebooks"
+          echo "has_affected_notebooks=$([ -n "$notebooks" ] && echo 'true' || echo 'false')" >> $GITHUB_OUTPUT
+      - name: Build examples
+        if: steps.check_notebooks.outputs.has_affected_notebooks == 'true'
+        run: yarn turbo:command build --filter=examples
+      - name: Validate affected notebooks in docs/core_docs
+        if: steps.check_notebooks.outputs.has_affected_notebooks == 'true'
+        run: |
+          notebooks=$(echo '${{ steps.changed-files.outputs.all_changed_files }}' | tr ' ' '\n' | grep '^docs/core_docs/.*\.ipynb$' || true)
+          if [ -n "$notebooks" ]; then
+            for notebook in $notebooks; do
+              yarn notebook:validate "$notebook"
+            done
+          else
+            echo "No notebooks in docs/core_docs to validate."
+          fi
diff --git a/.yarn/plugins/@yarnpkg/plugin-workspace-tools.cjs b/.yarn/plugins/@yarnpkg/plugin-workspace-tools.cjs
diff --git a/.yarnrc.yml b/.yarnrc.yml
@@ -3,6 +3,8 @@ nodeLinker: node-modules
 plugins:
   - path: .yarn/plugins/@yarnpkg/plugin-typescript.cjs
     spec: "@yarnpkg/plugin-typescript"
+  - path: .yarn/plugins/@yarnpkg/plugin-workspace-tools.cjs
+    spec: "@yarnpkg/plugin-workspace-tools"
 
 supportedArchitectures:
   cpu:

diff --git a/docs/core_docs/.gitignore b/docs/core_docs/.gitignore
@@ -20,6 +20,7 @@ src/supabase.d.ts
 npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
+/scripts/tmp
 
 # ESLint
 .eslintcache
@@ -33,8 +34,6 @@ yarn-error.log*
 
 /.quarto/
 # AUTO_GENERATED_DOCS
-docs/tutorials/test.md
-docs/tutorials/test.mdx
 docs/tutorials/rag.md
 docs/tutorials/rag.mdx
 docs/tutorials/query_analysis.md
@@ -210,4 +209,28 @@ docs/how_to/assign.mdx
 docs/how_to/agent_executor.md
 docs/how_to/agent_executor.mdx
 docs/integrations/llms/mistral.md
-docs/integrations/llms/mistral.mdx
+docs/integrations/llms/mistral.mdx
+docs/integrations/chat/togetherai.md
+docs/integrations/chat/togetherai.mdx
+docs/integrations/chat/openai.md
+docs/integrations/chat/openai.mdx
+docs/integrations/chat/ollama.md
+docs/integrations/chat/ollama.mdx
+docs/integrations/chat/mistral.md
+docs/integrations/chat/mistral.mdx
+docs/integrations/chat/groq.md
+docs/integrations/chat/groq.mdx
+docs/integrations/chat/google_vertex_ai.md
+docs/integrations/chat/google_vertex_ai.mdx
+docs/integrations/chat/google_generativeai.md
+docs/integrations/chat/google_generativeai.mdx
+docs/integrations/chat/fireworks.md
+docs/integrations/chat/fireworks.mdx
+docs/integrations/chat/cohere.md
+docs/integrations/chat/cohere.mdx
+docs/integrations/chat/azure.md
+docs/integrations/chat/azure.mdx
+docs/integrations/chat/anthropic.md
+docs/integrations/chat/anthropic.mdx
+docs/integrations/document_loaders/web_loaders/web_cheerio.md
+docs/integrations/document_loaders/web_loaders/web_cheerio.mdx
diff --git a/docs/core_docs/README.md b/docs/core_docs/README.md
@@ -47,3 +47,11 @@ Some common defaults for linting/formatting have been set for you. If you integr
 ```
 $ yarn ci
 ```
+
+### Validating Notebooks
+
+You can validate that notebooks build and compile TypeScript using the following command:
+
+```bash
+$ yarn validate <PATH_TO_FILE>
+```
diff --git a/docs/core_docs/docs/concepts.mdx b/docs/core_docs/docs/concepts.mdx
@@ -513,6 +513,29 @@ Retrievers accept a string query as input and return an array of `Document`s as
 
 For specifics on how to use retrievers, see the [relevant how-to guides here](/docs/how_to/#retrievers).
 
+### Key-value stores
+
+For some techniques, such as [indexing and retrieval with multiple vectors per document](/docs/how_to/multi_vector/), having some sort of key-value (KV) storage is helpful.
+
+LangChain includes a [`BaseStore`](https://api.js.langchain.com/classes/langchain_core_stores.BaseStore.html) interface,
+which allows for storage of arbitrary data. However, LangChain components that require KV-storage accept a
+more specific `BaseStore<string, Uint8Array>` instance that stores binary data (referred to as a `ByteStore`), and internally take care of
+encoding and decoding data for their specific needs.
+
+This means that as a user, you only need to think about one type of store rather than different ones for different types of data.
+
+#### Interface
+
+All [`BaseStores`](https://api.js.langchain.com/classes/langchain_core_stores.BaseStore.html) support the following interface. Note that the interface allows
+for modifying **multiple** key-value pairs at once:
+
+- `mget(keys: string[]): Promise<(undefined | Uint8Array)[]>`: get the contents of multiple keys, returning `None` if the key does not exist
+- `mset(keyValuePairs: [string, Uint8Array][]): Promise<void>`: set the contents of multiple keys
+- `mdelete(keys: string[]): Promise<void>`: delete multiple keys
+- `yieldKeys(prefix?: string): AsyncGenerator<string>`: yield all keys in the store, optionally filtering by a prefix
+
+For key-value store implementations, see [this section](/docs/integrations/stores/).
+
 ### Tools
 
 <span data-heading-keywords="tool,tools"></span>

diff --git a/docs/core_docs/docs/how_to/callbacks_custom_events.ipynb b/docs/core_docs/docs/how_to/callbacks_custom_events.ipynb
@@ -46,7 +46,7 @@
     ":::caution Compatibility\n",
     "Dispatching custom callback events requires `@langchain/core>=0.2.16`. See [this guide](/docs/how_to/installation/#installing-integration-packages) for some considerations to take when upgrading `@langchain/core`.\n",
     "\n",
-    "The default entrypoint below triggers an import and initialization of [`async_hooks`](https://nodejs.org/api/async_hooks.html) to enable automatic `RunnableConfig` passing, which is not supported in all environments. If you see import issues, you must import from `@langchain/callbacks/dispatch/web` and propagate the `RunnableConfig` object manually (see example below).\n",
+    "The default entrypoint below triggers an import and initialization of [`async_hooks`](https://nodejs.org/api/async_hooks.html) to enable automatic `RunnableConfig` passing, which is not supported in all environments. If you see import issues, you must import from `@langchain/core/callbacks/dispatch/web` and propagate the `RunnableConfig` object manually (see example below).\n",
     ":::\n",
     "```"
    ]

diff --git a/docs/core_docs/docs/how_to/graph_semantic.ipynb b/docs/core_docs/docs/how_to/graph_semantic.ipynb
@@ -180,24 +180,18 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "import { StructuredTool } from \"@langchain/core/tools\";\n",
+        "import { tool } from \"@langchain/core/tools\";\n",
         "import { z } from \"zod\";\n",
         "\n",
-        "const informationInput = z.object({\n",
-        "    entity: z.string().describe(\"movie or a person mentioned in the question\"),\n",
-        "});\n",
-        "\n",
-        "class InformationTool extends StructuredTool {\n",
-        "    schema = informationInput;\n",
-        "\n",
-        "    name = \"Information\";\n",
-        "\n",
-        "    description = \"useful for when you need to answer questions about various actors or movies\";\n",
-        "\n",
-        "    async _call(input: z.infer<typeof informationInput>): Promise<string> {\n",
-        "        return getInformation(input.entity);\n",
-        "    }\n",
-        "}"
+        "const informationTool = tool((input) => {\n",
+        "    return getInformation(input.entity);\n",
+        "}, {\n",
+        "    name: \"Information\",\n",
+        "    description: \"useful for when you need to answer questions about various actors or movies\",\n",
+        "    schema: z.object({\n",
+        "        entity: z.string().describe(\"movie or a person mentioned in the question\"),\n",
+        "    }),\n",
+        "});"
       ]
     },
     {
@@ -227,7 +221,7 @@
         "import { RunnableSequence } from \"@langchain/core/runnables\";\n",
         "\n",
         "const llm = new ChatOpenAI({ model: \"gpt-3.5-turbo\", temperature: 0 })\n",
-        "const tools = [new InformationTool()]\n",
+        "const tools = [informationTool]\n",
         "\n",
         "const llmWithTools = llm.bind({\n",
         "    functions: tools.map(convertToOpenAIFunction),\n",

diff --git a/docs/core_docs/docs/how_to/streaming.ipynb b/docs/core_docs/docs/how_to/streaming.ipynb
@@ -70,6 +70,21 @@
     "```"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "// @ls-docs-hide-cell\n",
+    "import { ChatOpenAI } from \"@langchain/openai\";\n",
+    "\n",
+    "const model = new ChatOpenAI({\n",
+    "  model: \"gpt-4o\",\n",
+    "  temperature: 0,\n",
+    "});"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 3,

diff --git a/docs/core_docs/docs/how_to/tool_calls_multimodal.ipynb b/docs/core_docs/docs/how_to/tool_calls_multimodal.ipynb
@@ -271,22 +271,17 @@
    ],
    "source": [
     "import { SystemMessage } from \"@langchain/core/messages\";\n",
-    "import { StructuredTool } from \"@langchain/core/tools\";\n",
-    "\n",
-    "class SummaryTool extends StructuredTool {\n",
-    "  schema = z.object({\n",
-    "      summary: z.string().describe(\"The summary of the content to log\")\n",
-    "  })\n",
-    "\n",
-    "  description = \"Log the summary of the content\"\n",
-    "\n",
-    "  name = \"summary_tool\"\n",
+    "import { tool } from \"@langchain/core/tools\";\n",
     "\n",
-    "  async _call(input: z.infer<typeof this.schema>) {\n",
-    "      return input.summary\n",
-    "  }\n",
-    "}\n",
-    "const summaryTool = new SummaryTool()\n",
+    "const summaryTool = tool((input) => {\n",
+    "  return input.summary;\n",
+    "}, {\n",
+    "  name: \"summary_tool\",\n",
+    "  description: \"Log the summary of the content\",\n",
+    "  schema: z.object({\n",
+    "    summary: z.string().describe(\"The summary of the content to log\")\n",
+    "  }),\n",
+    "});\n",
     "\n",
     "const audioUrl = \"https://www.pacdv.com/sounds/people_sound_effects/applause-1.wav\";\n",
     "\n",