diff --git a/deno.json b/deno.json index 35e488d9e0e0..367aeecd135f 100644 --- a/deno.json +++ b/deno.json @@ -22,7 +22,6 @@ "zod": "npm:/zod", "zod-to-json-schema": "npm:/zod-to-json-schema", "node-llama-cpp": "npm:/node-llama-cpp", - "ml-distance": "npm:/ml-distance", "pdf-parse": "npm:/pdf-parse", "peggy": "npm:/peggy", "readline": "https://deno.land/x/readline@v1.1.0/mod.ts", @@ -30,6 +29,14 @@ "youtubei.js": "npm:/youtubei.js", "youtube-transcript": "npm:/youtube-transcript", "neo4j-driver": "npm:/neo4j-driver", - "axios": "npm:/axios" + "axios": "npm:/axios", + "@mendable/firecrawl-js": "npm:/@mendable/firecrawl-js", + "@aws-crypto/sha256-js": "npm:/@aws-crypto/sha256-js", + "@aws-sdk/credential-provider-node": "npm:/@aws-sdk/credential-provider-node", + "@smithy/protocol-http": "npm:/@smithy/protocol-http", + "@smithy/signature-v4": "npm:/@smithy/signature-v4", + "@smithy/eventstream-codec": "npm:/@smithy/eventstream-codec", + "@smithy/util-utf8": "npm:/@smithy/util-utf8", + "@aws-sdk/types": "npm:/@aws-sdk/types" } } \ No newline at end of file diff --git a/docs/core_docs/.gitignore b/docs/core_docs/.gitignore index df8abd01615f..3b2f9ca94e78 100644 --- a/docs/core_docs/.gitignore +++ b/docs/core_docs/.gitignore @@ -34,6 +34,26 @@ yarn-error.log* /.quarto/ # AUTO_GENERATED_DOCS +docs/tutorials/rag.md +docs/tutorials/rag.mdx +docs/tutorials/query_analysis.md +docs/tutorials/query_analysis.mdx +docs/tutorials/qa_chat_history.md +docs/tutorials/qa_chat_history.mdx +docs/tutorials/pdf_qa.md +docs/tutorials/pdf_qa.mdx +docs/tutorials/local_rag.md +docs/tutorials/local_rag.mdx +docs/tutorials/llm_chain.md +docs/tutorials/llm_chain.mdx +docs/tutorials/graph.md +docs/tutorials/graph.mdx +docs/tutorials/extraction.md +docs/tutorials/extraction.mdx +docs/tutorials/classification.md +docs/tutorials/classification.mdx +docs/tutorials/chatbot.md +docs/tutorials/chatbot.mdx docs/how_to/trim_messages.md docs/how_to/trim_messages.mdx docs/how_to/tools_prompting.md @@ -188,27 +208,29 @@ docs/how_to/assign.md docs/how_to/assign.mdx docs/how_to/agent_executor.md docs/how_to/agent_executor.mdx -docs/tutorials/rag.md -docs/tutorials/rag.mdx -docs/tutorials/query_analysis.md -docs/tutorials/query_analysis.mdx -docs/tutorials/qa_chat_history.md -docs/tutorials/qa_chat_history.mdx -docs/tutorials/pdf_qa.md -docs/tutorials/pdf_qa.mdx -docs/tutorials/local_rag.md -docs/tutorials/local_rag.mdx -docs/tutorials/llm_chain.md -docs/tutorials/llm_chain.mdx -docs/tutorials/graph.md -docs/tutorials/graph.mdx -docs/tutorials/extraction.md -docs/tutorials/extraction.mdx -docs/tutorials/classification.md -docs/tutorials/classification.mdx -docs/tutorials/chatbot.md -docs/tutorials/chatbot.mdx docs/integrations/llms/mistral.md docs/integrations/llms/mistral.mdx +docs/integrations/chat/togetherai.md +docs/integrations/chat/togetherai.mdx +docs/integrations/chat/openai.md +docs/integrations/chat/openai.mdx +docs/integrations/chat/ollama.md +docs/integrations/chat/ollama.mdx docs/integrations/chat/mistral.md -docs/integrations/chat/mistral.mdx \ No newline at end of file +docs/integrations/chat/mistral.mdx +docs/integrations/chat/groq.md +docs/integrations/chat/groq.mdx +docs/integrations/chat/google_vertex_ai.md +docs/integrations/chat/google_vertex_ai.mdx +docs/integrations/chat/google_generativeai.md +docs/integrations/chat/google_generativeai.mdx +docs/integrations/chat/fireworks.md +docs/integrations/chat/fireworks.mdx +docs/integrations/chat/cohere.md +docs/integrations/chat/cohere.mdx +docs/integrations/chat/azure.md +docs/integrations/chat/azure.mdx +docs/integrations/chat/anthropic.md +docs/integrations/chat/anthropic.mdx +docs/integrations/document_loaders/web_loaders/web_cheerio.md +docs/integrations/document_loaders/web_loaders/web_cheerio.mdx \ No newline at end of file diff --git a/docs/core_docs/docs/integrations/chat/google_generativeai.ipynb b/docs/core_docs/docs/integrations/chat/google_generativeai.ipynb index af47303b2917..192339ddce01 100644 --- a/docs/core_docs/docs/integrations/chat/google_generativeai.ipynb +++ b/docs/core_docs/docs/integrations/chat/google_generativeai.ipynb @@ -326,23 +326,19 @@ "```{=mdx}\n", "\n", ":::caution\n", + "\n", "The Google GenerativeAI API does not allow tool schemas to contain an object with unknown properties.\n", "\n", - "For example, the following Zod schema will throw an error:\n", + "For example, the following Zod schemas will throw an error:\n", "\n", - "```typescript\n", - "const schema = z.object({\n", - " properties: z.record(z.unknown()), // Not allowed\n", - "});\n", - "```\n", + "`const invalidSchema = z.object({ properties: z.record(z.unknown()) });`\n", "\n", - "or\n", + "and\n", "\n", - "```typescript\n", - "const schema = z.record(z.unknown()); // Not allowed\n", - "```\n", + "`const invalidSchema2 = z.record(z.unknown());`\n", "\n", "Instead, you should explicitly define the properties of the object field.\n", + "\n", ":::\n", "\n", "```\n" diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/csv.ipynb b/docs/core_docs/docs/integrations/document_loaders/file_loaders/csv.ipynb new file mode 100644 index 000000000000..5f0f34c143d5 --- /dev/null +++ b/docs/core_docs/docs/integrations/document_loaders/file_loaders/csv.ipynb @@ -0,0 +1,226 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: CSV\n", + "sidebar_class_name: node-only\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CSVLoader\n", + "\n", + "```{=mdx}\n", + "\n", + ":::tip Compatibility\n", + "\n", + "Only available on Node.js.\n", + "\n", + ":::\n", + "\n", + "```\n", + "\n", + "This notebook provides a quick overview for getting started with `CSVLoader` [document loaders](/docs/concepts/#document-loaders). For detailed documentation of all `CSVLoader` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_community_document_loaders_fs_csv.CSVLoader.html).\n", + "\n", + "This example goes over how to load data from CSV files. The second argument is the `column` name to extract from the CSV file. One document will be created for each row in the CSV file. When `column` is not specified, each row is converted into a key/value pair with each key/value pair outputted to a new line in the document's `pageContent`. When `column` is specified, one document is created for each row, and the value of the specified column is used as the document's `pageContent`.\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Compatibility | Local | [PY support](https://python.langchain.com/docs/integrations/document_loaders/csv)| \n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "| [CSVLoader](https://api.js.langchain.com/classes/langchain_community_document_loaders_fs_csv.CSVLoader.html) | [@langchain/community](https://api.js.langchain.com/modules/langchain_community_document_loaders_fs_csv.html) | Node-only | ✅ | ✅ |\n", + "\n", + "## Setup\n", + "\n", + "To access `CSVLoader` document loader you'll need to install the `@langchain/community` integration, along with the `d3-dsv@2` peer dependency.\n", + "\n", + "### Installation\n", + "\n", + "The LangChain CSVLoader integration lives in the `@langchain/community` integration package.\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community d3-dsv@2\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and load documents:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import { CSVLoader } from \"@langchain/community/document_loaders/fs/csv\"\n", + "\n", + "const exampleCsvPath = \"../../../../../../langchain/src/document_loaders/tests/example_data/example_separator.csv\";\n", + "\n", + "const loader = new CSVLoader(exampleCsvPath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document {\n", + " pageContent: 'id|html: 1|\"Corruption discovered at the core of the Banking Clan!\"',\n", + " metadata: {\n", + " source: '../../../../../../langchain/src/document_loaders/tests/example_data/example_separator.csv',\n", + " line: 1\n", + " },\n", + " id: undefined\n", + "}\n" + ] + } + ], + "source": [ + "const docs = await loader.load()\n", + "docs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " source: '../../../../../../langchain/src/document_loaders/tests/example_data/example_separator.csv',\n", + " line: 1\n", + "}\n" + ] + } + ], + "source": [ + "console.log(docs[0].metadata)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage, extracting a single column\n", + "\n", + "Example CSV file:\n", + "\n", + "```csv\n", + "id|html\n", + "1|\"Corruption discovered at the core of the Banking Clan!\"\n", + "2|\"Reunited, Rush Clovis and Senator Amidala\"\n", + "3|\"discover the full extent of the deception.\"\n", + "4|\"Anakin Skywalker is sent to the rescue!\"\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document {\n", + " pageContent: 'Corruption discovered at the core of the Banking Clan!',\n", + " metadata: {\n", + " source: '../../../../../../langchain/src/document_loaders/tests/example_data/example_separator.csv',\n", + " line: 1\n", + " },\n", + " id: undefined\n", + "}\n" + ] + } + ], + "source": [ + "import { CSVLoader } from \"@langchain/community/document_loaders/fs/csv\";\n", + "\n", + "const singleColumnLoader = new CSVLoader(\n", + " exampleCsvPath,\n", + " {\n", + " column: \"html\",\n", + " separator:\"|\"\n", + " }\n", + ");\n", + "\n", + "const singleColumnDocs = await singleColumnLoader.load();\n", + "console.log(singleColumnDocs[0]);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all CSVLoader features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_community_document_loaders_fs_csv.CSVLoader.html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/csv.mdx b/docs/core_docs/docs/integrations/document_loaders/file_loaders/csv.mdx deleted file mode 100644 index e9adf18540a8..000000000000 --- a/docs/core_docs/docs/integrations/document_loaders/file_loaders/csv.mdx +++ /dev/null @@ -1,90 +0,0 @@ -# CSV files - -This example goes over how to load data from CSV files. The second argument is the `column` name to extract from the CSV file. One document will be created for each row in the CSV file. When `column` is not specified, each row is converted into a key/value pair with each key/value pair outputted to a new line in the document's `pageContent`. When `column` is specified, one document is created for each row, and the value of the specified column is used as the document's pageContent. - -## Setup - -```bash npm2yarn -npm install d3-dsv@2 -``` - -## Usage, extracting all columns - -Example CSV file: - -```csv -id,text -1,This is a sentence. -2,This is another sentence. -``` - -Example code: - -```typescript -import { CSVLoader } from "@langchain/community/document_loaders/fs/csv"; - -const loader = new CSVLoader("src/document_loaders/example_data/example.csv"); - -const docs = await loader.load(); -/* -[ - Document { - "metadata": { - "line": 1, - "source": "src/document_loaders/example_data/example.csv", - }, - "pageContent": "id: 1 -text: This is a sentence.", - }, - Document { - "metadata": { - "line": 2, - "source": "src/document_loaders/example_data/example.csv", - }, - "pageContent": "id: 2 -text: This is another sentence.", - }, -] -*/ -``` - -## Usage, extracting a single column - -Example CSV file: - -```csv -id,text -1,This is a sentence. -2,This is another sentence. -``` - -Example code: - -```typescript -import { CSVLoader } from "@langchain/community/document_loaders/fs/csv"; - -const loader = new CSVLoader( - "src/document_loaders/example_data/example.csv", - "text" -); - -const docs = await loader.load(); -/* -[ - Document { - "metadata": { - "line": 1, - "source": "src/document_loaders/example_data/example.csv", - }, - "pageContent": "This is a sentence.", - }, - Document { - "metadata": { - "line": 2, - "source": "src/document_loaders/example_data/example.csv", - }, - "pageContent": "This is another sentence.", - }, -] -*/ -``` diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/directory.ipynb b/docs/core_docs/docs/integrations/document_loaders/file_loaders/directory.ipynb new file mode 100644 index 000000000000..3d19d94677d2 --- /dev/null +++ b/docs/core_docs/docs/integrations/document_loaders/file_loaders/directory.ipynb @@ -0,0 +1,192 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: DirectoryLoader\n", + "sidebar_class_name: node-only\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# DirectoryLoader\n", + "\n", + "```{=mdx}\n", + "\n", + ":::tip Compatibility\n", + "\n", + "Only available on Node.js.\n", + "\n", + ":::\n", + "\n", + "```\n", + "\n", + "This notebook provides a quick overview for getting started with `DirectoryLoader` [document loaders](/docs/concepts/#document-loaders). For detailed documentation of all `DirectoryLoader` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_document_loaders_fs_directory.DirectoryLoader.html).\n", + "\n", + "This example goes over how to load data from folders with multiple files. The second argument is a map of file extensions to loader factories. Each file will be passed to the matching loader, and the resulting documents will be concatenated together.\n", + "\n", + "Example folder:\n", + "\n", + "```text\n", + "src/document_loaders/example_data/example/\n", + "├── example.json\n", + "├── example.jsonl\n", + "├── example.txt\n", + "└── example.csv\n", + "```\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Compatibility | Local | PY support | \n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "| [DirectoryLoader](https://api.js.langchain.com/classes/langchain_document_loaders_fs_directory.DirectoryLoader.html) | [langchain](https://api.js.langchain.com/modules/langchain_document_loaders_fs_directory.html) | Node-only | ✅ | ✅ |\n", + "\n", + "## Setup\n", + "\n", + "To access `DirectoryLoader` document loader you'll need to install the `langchain` package.\n", + "\n", + "### Installation\n", + "\n", + "The LangChain DirectoryLoader integration lives in the `langchain` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " langchain\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and load documents:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import { DirectoryLoader } from \"langchain/document_loaders/fs/directory\";\n", + "import {\n", + " JSONLoader,\n", + " JSONLinesLoader,\n", + "} from \"langchain/document_loaders/fs/json\";\n", + "import { TextLoader } from \"langchain/document_loaders/fs/text\";\n", + "import { CSVLoader } from \"@langchain/community/document_loaders/fs/csv\";\n", + "\n", + "const loader = new DirectoryLoader(\n", + " \"../../../../../../examples/src/document_loaders/example_data\",\n", + " {\n", + " \".json\": (path) => new JSONLoader(path, \"/texts\"),\n", + " \".jsonl\": (path) => new JSONLinesLoader(path, \"/html\"),\n", + " \".txt\": (path) => new TextLoader(path),\n", + " \".csv\": (path) => new CSVLoader(path, \"text\"),\n", + " }\n", + ");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document {\n", + " pageContent: 'Foo\\nBar\\nBaz\\n\\n',\n", + " metadata: {\n", + " source: '/Users/bracesproul/code/lang-chain-ai/langchainjs/examples/src/document_loaders/example_data/example.txt'\n", + " },\n", + " id: undefined\n", + "}\n" + ] + } + ], + "source": [ + "const docs = await loader.load()\n", + "// disable console.warn calls\n", + "console.warn = () => {}\n", + "docs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " source: '/Users/bracesproul/code/lang-chain-ai/langchainjs/examples/src/document_loaders/example_data/example.txt'\n", + "}\n" + ] + } + ], + "source": [ + "console.log(docs[0].metadata)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all DirectoryLoader features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_document_loaders_fs_directory.DirectoryLoader.html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/directory.mdx b/docs/core_docs/docs/integrations/document_loaders/file_loaders/directory.mdx deleted file mode 100644 index a0c3f67ad700..000000000000 --- a/docs/core_docs/docs/integrations/document_loaders/file_loaders/directory.mdx +++ /dev/null @@ -1,42 +0,0 @@ ---- -sidebar_position: 1 -hide_table_of_contents: true ---- - -# Folders with multiple files - -This example goes over how to load data from folders with multiple files. The second argument is a map of file extensions to loader factories. Each file will be passed to the matching loader, and the resulting documents will be concatenated together. - -Example folder: - -```text -src/document_loaders/example_data/example/ -├── example.json -├── example.jsonl -├── example.txt -└── example.csv -``` - -Example code: - -```typescript -import { DirectoryLoader } from "langchain/document_loaders/fs/directory"; -import { - JSONLoader, - JSONLinesLoader, -} from "langchain/document_loaders/fs/json"; -import { TextLoader } from "langchain/document_loaders/fs/text"; -import { CSVLoader } from "@langchain/community/document_loaders/fs/csv"; - -const loader = new DirectoryLoader( - "src/document_loaders/example_data/example", - { - ".json": (path) => new JSONLoader(path, "/texts"), - ".jsonl": (path) => new JSONLinesLoader(path, "/html"), - ".txt": (path) => new TextLoader(path), - ".csv": (path) => new CSVLoader(path, "text"), - } -); -const docs = await loader.load(); -console.log({ docs }); -``` diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/pdf.ipynb b/docs/core_docs/docs/integrations/document_loaders/file_loaders/pdf.ipynb new file mode 100644 index 000000000000..ac0092586134 --- /dev/null +++ b/docs/core_docs/docs/integrations/document_loaders/file_loaders/pdf.ipynb @@ -0,0 +1,502 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: PDFLoader\n", + "sidebar_class_name: node-only\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PDFLoader\n", + "\n", + "```{=mdx}\n", + "\n", + ":::tip Compatibility\n", + "\n", + "Only available on Node.js.\n", + "\n", + ":::\n", + "\n", + "```\n", + "\n", + "This notebook provides a quick overview for getting started with `PDFLoader` [document loaders](/docs/concepts/#document-loaders). For detailed documentation of all `PDFLoader` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_community_document_loaders_fs_pdf.PDFLoader.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Compatibility | Local | PY support | \n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "| [PDFLoader](https://api.js.langchain.com/classes/langchain_community_document_loaders_fs_pdf.PDFLoader.html) | [@langchain/community](https://api.js.langchain.com/modules/langchain_community_document_loaders_fs_pdf.html) | Node-only | ✅ | 🟠 (See note below) |\n", + "\n", + "> The Python package has many PDF loaders to choose from. See [this link](https://python.langchain.com/docs/integrations/document_loaders/) for a full list of Python document loaders.\n", + "\n", + "## Setup\n", + "\n", + "To access `PDFLoader` document loader you'll need to install the `@langchain/community` integration, along with the `pdf-parse` package.\n", + "\n", + "### Credentials\n", + "\n", + "### Installation\n", + "\n", + "The LangChain PDFLoader integration lives in the `@langchain/community` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community pdf-parse\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and load documents:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "import { PDFLoader } from \"@langchain/community/document_loaders/fs/pdf\"\n", + "\n", + "const nike10kPdfPath = \"../../../../data/nke-10k-2023.pdf\"\n", + "\n", + "const loader = new PDFLoader(nike10kPdfPath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document {\n", + " pageContent: 'Table of Contents\\n' +\n", + " 'UNITED STATES\\n' +\n", + " 'SECURITIES AND EXCHANGE COMMISSION\\n' +\n", + " 'Washington, D.C. 20549\\n' +\n", + " 'FORM 10-K\\n' +\n", + " '(Mark One)\\n' +\n", + " '☑ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934\\n' +\n", + " 'FOR THE FISCAL YEAR ENDED MAY 31, 2023\\n' +\n", + " 'OR\\n' +\n", + " '☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934\\n' +\n", + " 'FOR THE TRANSITION PERIOD FROM TO .\\n' +\n", + " 'Commission File No. 1-10635\\n' +\n", + " 'NIKE, Inc.\\n' +\n", + " '(Exact name of Registrant as specified in its charter)\\n' +\n", + " 'Oregon93-0584541\\n' +\n", + " '(State or other jurisdiction of incorporation)(IRS Employer Identification No.)\\n' +\n", + " 'One Bowerman Drive, Beaverton, Oregon 97005-6453\\n' +\n", + " '(Address of principal executive offices and zip code)\\n' +\n", + " '(503) 671-6453\\n' +\n", + " \"(Registrant's telephone number, including area code)\\n\" +\n", + " 'SECURITIES REGISTERED PURSUANT TO SECTION 12(B) OF THE ACT:\\n' +\n", + " 'Class B Common StockNKENew York Stock Exchange\\n' +\n", + " '(Title of each class)(Trading symbol)(Name of each exchange on which registered)\\n' +\n", + " 'SECURITIES REGISTERED PURSUANT TO SECTION 12(G) OF THE ACT:\\n' +\n", + " 'NONE\\n' +\n", + " 'Indicate by check mark:YESNO\\n' +\n", + " '•if the registrant is a well-known seasoned issuer, as defined in Rule 405 of the Securities Act.þ ̈\\n' +\n", + " '•if the registrant is not required to file reports pursuant to Section 13 or Section 15(d) of the Act. ̈þ\\n' +\n", + " '•whether the registrant (1) has filed all reports required to be filed by Section 13 or 15(d) of the Securities Exchange Act of 1934 during the preceding\\n' +\n", + " '12 months (or for such shorter period that the registrant was required to file such reports), and (2) has been subject to such filing requirements for the\\n' +\n", + " 'past 90 days.\\n' +\n", + " 'þ ̈\\n' +\n", + " '•whether the registrant has submitted electronically every Interactive Data File required to be submitted pursuant to Rule 405 of Regulation S-T\\n' +\n", + " '(§232.405 of this chapter) during the preceding 12 months (or for such shorter period that the registrant was required to submit such files).\\n' +\n", + " 'þ ̈\\n' +\n", + " '•whether the registrant is a large accelerated filer, an accelerated filer, a non-accelerated filer, a smaller reporting company or an emerging growth company. See the definitions of “large accelerated filer,”\\n' +\n", + " '“accelerated filer,” “smaller reporting company,” and “emerging growth company” in Rule 12b-2 of the Exchange Act.\\n' +\n", + " 'Large accelerated filerþAccelerated filer☐Non-accelerated filer☐Smaller reporting company☐Emerging growth company☐\\n' +\n", + " '•if an emerging growth company, if the registrant has elected not to use the extended transition period for complying with any new or revised financial\\n' +\n", + " 'accounting standards provided pursuant to Section 13(a) of the Exchange Act.\\n' +\n", + " ' ̈\\n' +\n", + " \"•whether the registrant has filed a report on and attestation to its management's assessment of the effectiveness of its internal control over financial\\n\" +\n", + " 'reporting under Section 404(b) of the Sarbanes-Oxley Act (15 U.S.C. 7262(b)) by the registered public accounting firm that prepared or issued its audit\\n' +\n", + " 'report.\\n' +\n", + " 'þ\\n' +\n", + " '•if securities are registered pursuant to Section 12(b) of the Act, whether the financial statements of the registrant included in the filing reflect the\\n' +\n", + " 'correction of an error to previously issued financial statements.\\n' +\n", + " ' ̈\\n' +\n", + " '•whether any of those error corrections are restatements that required a recovery analysis of incentive-based compensation received by any of the\\n' +\n", + " \"registrant's executive officers during the relevant recovery period pursuant to § 240.10D-1(b).\\n\" +\n", + " ' ̈\\n' +\n", + " '•\\n' +\n", + " 'whether the registrant is a shell company (as defined in Rule 12b-2 of the Act).☐þ\\n' +\n", + " \"As of November 30, 2022, the aggregate market values of the Registrant's Common Stock held by non-affiliates were:\\n\" +\n", + " 'Class A$7,831,564,572 \\n' +\n", + " 'Class B136,467,702,472 \\n' +\n", + " '$144,299,267,044 ',\n", + " metadata: {\n", + " source: '../../../../data/nke-10k-2023.pdf',\n", + " pdf: {\n", + " version: '1.10.100',\n", + " info: [Object],\n", + " metadata: null,\n", + " totalPages: 107\n", + " },\n", + " loc: { pageNumber: 1 }\n", + " },\n", + " id: undefined\n", + "}\n" + ] + } + ], + "source": [ + "const docs = await loader.load()\n", + "docs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " source: '../../../../data/nke-10k-2023.pdf',\n", + " pdf: {\n", + " version: '1.10.100',\n", + " info: {\n", + " PDFFormatVersion: '1.4',\n", + " IsAcroFormPresent: false,\n", + " IsXFAPresent: false,\n", + " Title: '0000320187-23-000039',\n", + " Author: 'EDGAR Online, a division of Donnelley Financial Solutions',\n", + " Subject: 'Form 10-K filed on 2023-07-20 for the period ending 2023-05-31',\n", + " Keywords: '0000320187-23-000039; ; 10-K',\n", + " Creator: 'EDGAR Filing HTML Converter',\n", + " Producer: 'EDGRpdf Service w/ EO.Pdf 22.0.40.0',\n", + " CreationDate: \"D:20230720162200-04'00'\",\n", + " ModDate: \"D:20230720162208-04'00'\"\n", + " },\n", + " metadata: null,\n", + " totalPages: 107\n", + " },\n", + " loc: { pageNumber: 1 }\n", + "}\n" + ] + } + ], + "source": [ + "console.log(docs[0].metadata)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage, one document per file" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Table of Contents\n", + "UNITED STATES\n", + "SECURITIES AND EXCHANGE COMMISSION\n", + "Washington, D.C. 20549\n", + "FORM 10-K\n", + "\n" + ] + } + ], + "source": [ + "import { PDFLoader } from \"@langchain/community/document_loaders/fs/pdf\";\n", + "\n", + "const singleDocPerFileLoader = new PDFLoader(nike10kPdfPath, {\n", + " splitPages: false,\n", + "});\n", + "\n", + "const singleDoc = await singleDocPerFileLoader.load();\n", + "console.log(singleDoc[0].pageContent.slice(0, 100))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage, custom `pdfjs` build\n", + "\n", + "By default we use the `pdfjs` build bundled with `pdf-parse`, which is compatible with most environments, including Node.js and modern browsers. If you want to use a more recent version of `pdfjs-dist` or if you want to use a custom build of `pdfjs-dist`, you can do so by providing a custom `pdfjs` function that returns a promise that resolves to the `PDFJS` object.\n", + "\n", + "In the following example we use the \"legacy\" (see [pdfjs docs](https://github.com/mozilla/pdf.js/wiki/Frequently-Asked-Questions#which-browsersenvironments-are-supported)) build of `pdfjs-dist`, which includes several polyfills not included in the default build.\n", + "\n", + "```{=mdx}\n", + "\n", + " pdfjs-dist\n", + "\n", + "\n", + "```\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import { PDFLoader } from \"@langchain/community/document_loaders/fs/pdf\";\n", + "\n", + "const customBuildLoader = new PDFLoader(nike10kPdfPath, {\n", + " // you may need to add `.then(m => m.default)` to the end of the import\n", + " // @lc-ts-ignore\n", + " pdfjs: () => import(\"pdfjs-dist/legacy/build/pdf.js\"),\n", + "});" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Eliminating extra spaces\n", + "\n", + "PDFs come in many varieties, which makes reading them a challenge. The loader parses individual text elements and joins them together with a space by default, but\n", + "if you are seeing excessive spaces, this may not be the desired behavior. In that case, you can override the separator with an empty string like this:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(Mark One)\n", + "☑ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934\n", + "FOR THE FISCAL YEAR ENDED MAY 31, 2023\n", + "OR\n", + "☐ TRANSITI\n" + ] + } + ], + "source": [ + "import { PDFLoader } from \"@langchain/community/document_loaders/fs/pdf\";\n", + "\n", + "const noExtraSpacesLoader = new PDFLoader(nike10kPdfPath, {\n", + " parsedItemSeparator: \"\",\n", + "});\n", + "\n", + "const noExtraSpacesDocs = await noExtraSpacesLoader.load();\n", + "console.log(noExtraSpacesDocs[0].pageContent.slice(100, 250))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading directories" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Unknown file type: Star_Wars_The_Clone_Wars_S06E07_Crisis_at_the_Heart.srt\n", + "Unknown file type: example.txt\n", + "Unknown file type: notion.md\n", + "Unknown file type: bad_frontmatter.md\n", + "Unknown file type: frontmatter.md\n", + "Unknown file type: no_frontmatter.md\n", + "Unknown file type: no_metadata.md\n", + "Unknown file type: tags_and_frontmatter.md\n", + "Unknown file type: test.mp3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document {\n", + " pageContent: 'Bitcoin: A Peer-to-Peer Electronic Cash System\\n' +\n", + " 'Satoshi Nakamoto\\n' +\n", + " 'satoshin@gmx.com\\n' +\n", + " 'www.bitcoin.org\\n' +\n", + " 'Abstract. A purely peer-to-peer version of electronic cash would allow online \\n' +\n", + " 'payments to be sent directly from one party to another without going through a \\n' +\n", + " 'financial institution. Digital signatures provide part of the solution, but the main \\n' +\n", + " 'benefits are lost if a trusted third party is still required to prevent double-spending. \\n' +\n", + " 'We propose a solution to the double-spending problem using a peer-to-peer network. \\n' +\n", + " 'The network timestamps transactions by hashing them into an ongoing chain of \\n' +\n", + " 'hash-based proof-of-work, forming a record that cannot be changed without redoing \\n' +\n", + " 'the proof-of-work. The longest chain not only serves as proof of the sequence of \\n' +\n", + " 'events witnessed, but proof that it came from the largest pool of CPU power. As \\n' +\n", + " 'long as a majority of CPU power is controlled by nodes that are not cooperating to \\n' +\n", + " \"attack the network, they'll generate the longest chain and outpace attackers. The \\n\" +\n", + " 'network itself requires minimal structure. Messages are broadcast on a best effort \\n' +\n", + " 'basis, and nodes can leave and rejoin the network at will, accepting the longest \\n' +\n", + " 'proof-of-work chain as proof of what happened while they were gone.\\n' +\n", + " '1.Introduction\\n' +\n", + " 'Commerce on the Internet has come to rely almost exclusively on financial institutions serving as \\n' +\n", + " 'trusted third parties to process electronic payments. While the system works well enough for \\n' +\n", + " 'most transactions, it still suffers from the inherent weaknesses of the trust based model. \\n' +\n", + " 'Completely non-reversible transactions are not really possible, since financial institutions cannot \\n' +\n", + " 'avoid mediating disputes. The cost of mediation increases transaction costs, limiting the \\n' +\n", + " 'minimum practical transaction size and cutting off the possibility for small casual transactions, \\n' +\n", + " 'and there is a broader cost in the loss of ability to make non-reversible payments for non-\\n' +\n", + " 'reversible services. With the possibility of reversal, the need for trust spreads. Merchants must \\n' +\n", + " 'be wary of their customers, hassling them for more information than they would otherwise need. \\n' +\n", + " 'A certain percentage of fraud is accepted as unavoidable. These costs and payment uncertainties \\n' +\n", + " 'can be avoided in person by using physical currency, but no mechanism exists to make payments \\n' +\n", + " 'over a communications channel without a trusted party.\\n' +\n", + " 'What is needed is an electronic payment system based on cryptographic proof instead of trust, \\n' +\n", + " 'allowing any two willing parties to transact directly with each other without the need for a trusted \\n' +\n", + " 'third party. Transactions that are computationally impractical to reverse would protect sellers \\n' +\n", + " 'from fraud, and routine escrow mechanisms could easily be implemented to protect buyers. In \\n' +\n", + " 'this paper, we propose a solution to the double-spending problem using a peer-to-peer distributed \\n' +\n", + " 'timestamp server to generate computational proof of the chronological order of transactions. The \\n' +\n", + " 'system is secure as long as honest nodes collectively control more CPU power than any \\n' +\n", + " 'cooperating group of attacker nodes.\\n' +\n", + " '1',\n", + " metadata: {\n", + " source: '/Users/bracesproul/code/lang-chain-ai/langchainjs/examples/src/document_loaders/example_data/bitcoin.pdf',\n", + " pdf: {\n", + " version: '1.10.100',\n", + " info: [Object],\n", + " metadata: null,\n", + " totalPages: 9\n", + " },\n", + " loc: { pageNumber: 1 }\n", + " },\n", + " id: undefined\n", + "}\n", + "Document {\n", + " pageContent: 'Bitcoin: A Peer-to-Peer Electronic Cash System\\n' +\n", + " 'Satoshi Nakamoto\\n' +\n", + " 'satoshin@gmx.com\\n' +\n", + " 'www.bitcoin.org\\n' +\n", + " 'Abstract. A purely peer-to-peer version of electronic cash would allow online \\n' +\n", + " 'payments to be sent directly from one party to another without going through a \\n' +\n", + " 'financial institution. Digital signatures provide part of the solution, but the main \\n' +\n", + " 'benefits are lost if a trusted third party is still required to prevent double-spending. \\n' +\n", + " 'We propose a solution to the double-spending problem using a peer-to-peer network. \\n' +\n", + " 'The network timestamps transactions by hashing them into an ongoing chain of \\n' +\n", + " 'hash-based proof-of-work, forming a record that cannot be changed without redoing \\n' +\n", + " 'the proof-of-work. The longest chain not only serves as proof of the sequence of \\n' +\n", + " 'events witnessed, but proof that it came from the largest pool of CPU power. As \\n' +\n", + " 'long as a majority of CPU power is controlled by nodes that are not cooperating to',\n", + " metadata: {\n", + " source: '/Users/bracesproul/code/lang-chain-ai/langchainjs/examples/src/document_loaders/example_data/bitcoin.pdf',\n", + " pdf: {\n", + " version: '1.10.100',\n", + " info: [Object],\n", + " metadata: null,\n", + " totalPages: 9\n", + " },\n", + " loc: { pageNumber: 1, lines: [Object] }\n", + " },\n", + " id: undefined\n", + "}\n" + ] + } + ], + "source": [ + "import { DirectoryLoader } from \"langchain/document_loaders/fs/directory\";\n", + "import { PDFLoader } from \"@langchain/community/document_loaders/fs/pdf\";\n", + "import { RecursiveCharacterTextSplitter } from \"@langchain/textsplitters\";\n", + "\n", + "const exampleDataPath = \"../../../../../../examples/src/document_loaders/example_data/\";\n", + "\n", + "/* Load all PDFs within the specified directory */\n", + "const directoryLoader = new DirectoryLoader(\n", + " exampleDataPath,\n", + " {\n", + " \".pdf\": (path: string) => new PDFLoader(path),\n", + " }\n", + ");\n", + "\n", + "const directoryDocs = await directoryLoader.load();\n", + "\n", + "console.log(directoryDocs[0]);\n", + "\n", + "/* Additional steps : Split text into chunks with any TextSplitter. You can then use it as context or save it to memory afterwards. */\n", + "const textSplitter = new RecursiveCharacterTextSplitter({\n", + " chunkSize: 1000,\n", + " chunkOverlap: 200,\n", + "});\n", + "\n", + "const splitDocs = await textSplitter.splitDocuments(directoryDocs);\n", + "console.log(splitDocs[0]);\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all PDFLoader features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_community_document_loaders_fs_pdf.PDFLoader.html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/pdf.mdx b/docs/core_docs/docs/integrations/document_loaders/file_loaders/pdf.mdx deleted file mode 100644 index 9e92902d452a..000000000000 --- a/docs/core_docs/docs/integrations/document_loaders/file_loaders/pdf.mdx +++ /dev/null @@ -1,72 +0,0 @@ -# PDF files - -This example goes over how to load data from PDF files. By default, one document will be created for each page in the PDF file, you can change this behavior by setting the `splitPages` option to `false`. - -## Setup - -```bash npm2yarn -npm install pdf-parse -``` - -## Usage, one document per page - -```typescript -import { PDFLoader } from "@langchain/community/document_loaders/fs/pdf"; - -const loader = new PDFLoader("src/document_loaders/example_data/example.pdf"); - -const docs = await loader.load(); -``` - -## Usage, one document per file - -```typescript -import { PDFLoader } from "@langchain/community/document_loaders/fs/pdf"; - -const loader = new PDFLoader("src/document_loaders/example_data/example.pdf", { - splitPages: false, -}); - -const docs = await loader.load(); -``` - -## Usage, custom `pdfjs` build - -By default we use the `pdfjs` build bundled with `pdf-parse`, which is compatible with most environments, including Node.js and modern browsers. If you want to use a more recent version of `pdfjs-dist` or if you want to use a custom build of `pdfjs-dist`, you can do so by providing a custom `pdfjs` function that returns a promise that resolves to the `PDFJS` object. - -In the following example we use the "legacy" (see [pdfjs docs](https://github.com/mozilla/pdf.js/wiki/Frequently-Asked-Questions#which-browsersenvironments-are-supported)) build of `pdfjs-dist`, which includes several polyfills not included in the default build. - -```bash npm2yarn -npm install pdfjs-dist -``` - -```typescript -import { PDFLoader } from "@langchain/community/document_loaders/fs/pdf"; - -const loader = new PDFLoader("src/document_loaders/example_data/example.pdf", { - // you may need to add `.then(m => m.default)` to the end of the import - pdfjs: () => import("pdfjs-dist/legacy/build/pdf.js"), -}); -``` - -## Eliminating extra spaces - -PDFs come in many varieties, which makes reading them a challenge. The loader parses individual text elements and joins them together with a space by default, but -if you are seeing excessive spaces, this may not be the desired behavior. In that case, you can override the separator with an empty string like this: - -```typescript -import { PDFLoader } from "@langchain/community/document_loaders/fs/pdf"; - -const loader = new PDFLoader("src/document_loaders/example_data/example.pdf", { - parsedItemSeparator: "", -}); - -const docs = await loader.load(); -``` - -## Loading directories - -import CodeBlock from "@theme/CodeBlock"; -import MemoryExample from "@examples/document_loaders/pdf_directory.ts"; - -{MemoryExample} diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/text.ipynb b/docs/core_docs/docs/integrations/document_loaders/file_loaders/text.ipynb new file mode 100644 index 000000000000..bf6c6de8d823 --- /dev/null +++ b/docs/core_docs/docs/integrations/document_loaders/file_loaders/text.ipynb @@ -0,0 +1,164 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: TextLoader\n", + "sidebar_class_name: node-only\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TextLoader\n", + "\n", + "```{=mdx}\n", + "\n", + ":::tip Compatibility\n", + "\n", + "Only available on Node.js.\n", + "\n", + ":::\n", + "\n", + "```\n", + "\n", + "This notebook provides a quick overview for getting started with `TextLoader` [document loaders](/docs/concepts/#document-loaders). For detailed documentation of all `TextLoader` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_document_loaders_fs_text.TextLoader.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Compatibility | Local | PY support | \n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "| [TextLoader](https://api.js.langchain.com/classes/langchain_document_loaders_fs_text.TextLoader.html) | [langchain](https://api.js.langchain.com/modules/langchain_document_loaders_fs_text.html) | Node-only | ✅ | ❌ |\n", + "\n", + "## Setup\n", + "\n", + "To access `TextLoader` document loader you'll need to install the `langchain` package.\n", + "\n", + "### Installation\n", + "\n", + "The LangChain TextLoader integration lives in the `langchain` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " langchain\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and load documents:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import { TextLoader } from \"langchain/document_loaders/fs/text\"\n", + "\n", + "const loader = new TextLoader(\"../../../../../../examples/src/document_loaders/example_data/example.txt\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document {\n", + " pageContent: 'Foo\\nBar\\nBaz\\n\\n',\n", + " metadata: {\n", + " source: '../../../../../../examples/src/document_loaders/example_data/example.txt'\n", + " },\n", + " id: undefined\n", + "}\n" + ] + } + ], + "source": [ + "const docs = await loader.load()\n", + "docs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " source: '../../../../../../examples/src/document_loaders/example_data/example.txt'\n", + "}\n" + ] + } + ], + "source": [ + "console.log(docs[0].metadata)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all TextLoader features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_document_loaders_fs_text.TextLoader.html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/text.mdx b/docs/core_docs/docs/integrations/document_loaders/file_loaders/text.mdx deleted file mode 100644 index d20d7c1942d2..000000000000 --- a/docs/core_docs/docs/integrations/document_loaders/file_loaders/text.mdx +++ /dev/null @@ -1,15 +0,0 @@ ---- -hide_table_of_contents: true ---- - -# Text files - -This example goes over how to load data from text files. - -```typescript -import { TextLoader } from "langchain/document_loaders/fs/text"; - -const loader = new TextLoader("src/document_loaders/example_data/example.txt"); - -const docs = await loader.load(); -``` diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/unstructured.ipynb b/docs/core_docs/docs/integrations/document_loaders/file_loaders/unstructured.ipynb new file mode 100644 index 000000000000..6004fabb0f8a --- /dev/null +++ b/docs/core_docs/docs/integrations/document_loaders/file_loaders/unstructured.ipynb @@ -0,0 +1,243 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Unstructured\n", + "sidebar_class_name: node-only\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# UnstructuredLoader\n", + "\n", + "```{=mdx}\n", + "\n", + ":::tip Compatibility\n", + "\n", + "Only available on Node.js.\n", + "\n", + ":::\n", + "\n", + "```\n", + "\n", + "This notebook provides a quick overview for getting started with `UnstructuredLoader` [document loaders](/docs/concepts/#document-loaders). For detailed documentation of all `UnstructuredLoader` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_community_document_loaders_fs_unstructured.UnstructuredLoader.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Compatibility | Local | [PY support](https://python.langchain.com/docs/integrations/document_loaders/unstructured_file) | \n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "| [UnstructuredLoader](https://api.js.langchain.com/classes/langchain_community_document_loaders_fs_unstructured.UnstructuredLoader.html) | [@langchain/community](https://api.js.langchain.com/modules/langchain_community_document_loaders_fs_unstructured.html) | Node-only | ✅ | ✅ |\n", + "\n", + "## Setup\n", + "\n", + "To access `UnstructuredLoader` document loader you'll need to install the `@langchain/community` integration package, and create an Unstructured account and get an API key.\n", + "\n", + "### Local\n", + "\n", + "You can run Unstructured locally in your computer using Docker. To do so, you need to have Docker installed. You can find the instructions to install Docker [here](https://docs.docker.com/get-docker/).\n", + "\n", + "```bash\n", + "docker run -p 8000:8000 -d --rm --name unstructured-api downloads.unstructured.io/unstructured-io/unstructured-api:latest --port 8000 --host 0.0.0.0\n", + "```\n", + "\n", + "### Credentials\n", + "\n", + "Head to [unstructured.io](https://unstructured.io/api-key-hosted) to sign up to Unstructured and generate an API key. Once you've done this set the `UNSTRUCTURED_API_KEY` environment variable:\n", + "\n", + "```bash\n", + "export UNSTRUCTURED_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain UnstructuredLoader integration lives in the `@langchain/community` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and load documents:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import { UnstructuredLoader } from \"@langchain/community/document_loaders/fs/unstructured\"\n", + "\n", + "const loader = new UnstructuredLoader(\"../../../../../../examples/src/document_loaders/example_data/notion.md\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document {\n", + " pageContent: '# Testing the notion markdownloader',\n", + " metadata: {\n", + " filename: 'notion.md',\n", + " languages: [ 'eng' ],\n", + " filetype: 'text/plain',\n", + " category: 'NarrativeText'\n", + " },\n", + " id: undefined\n", + "}\n" + ] + } + ], + "source": [ + "const docs = await loader.load()\n", + "docs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " filename: 'notion.md',\n", + " languages: [ 'eng' ],\n", + " filetype: 'text/plain',\n", + " category: 'NarrativeText'\n", + "}\n" + ] + } + ], + "source": [ + "console.log(docs[0].metadata)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Directories\n", + "\n", + "You can also load all of the files in the directory using [`UnstructuredDirectoryLoader`](https://v02.api.js.langchain.com/classes/langchain_document_loaders_fs_unstructured.UnstructuredDirectoryLoader.html), which inherits from [`DirectoryLoader`](/docs/integrations/document_loaders/file_loaders/directory):\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Unknown file type: Star_Wars_The_Clone_Wars_S06E07_Crisis_at_the_Heart.srt\n", + "Unknown file type: test.mp3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "directoryDocs.length: 247\n", + "Document {\n", + " pageContent: 'Bitcoin: A Peer-to-Peer Electronic Cash System',\n", + " metadata: {\n", + " filetype: 'application/pdf',\n", + " languages: [ 'eng' ],\n", + " page_number: 1,\n", + " filename: 'bitcoin.pdf',\n", + " category: 'Title'\n", + " },\n", + " id: undefined\n", + "}\n" + ] + } + ], + "source": [ + "import { UnstructuredDirectoryLoader } from \"@langchain/community/document_loaders/fs/unstructured\";\n", + "\n", + "const directoryLoader = new UnstructuredDirectoryLoader(\n", + " \"../../../../../../examples/src/document_loaders/example_data/\",\n", + " {}\n", + ");\n", + "const directoryDocs = await directoryLoader.load();\n", + "console.log(\"directoryDocs.length: \", directoryDocs.length);\n", + "console.log(directoryDocs[0])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all UnstructuredLoader features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_community_document_loaders_fs_unstructured.UnstructuredLoader.html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/unstructured.mdx b/docs/core_docs/docs/integrations/document_loaders/file_loaders/unstructured.mdx deleted file mode 100644 index 7c82029f16de..000000000000 --- a/docs/core_docs/docs/integrations/document_loaders/file_loaders/unstructured.mdx +++ /dev/null @@ -1,32 +0,0 @@ ---- -hide_table_of_contents: true ---- - -# Unstructured - -This example covers how to use [Unstructured.io](https://unstructured.io/) to load files of many types. Unstructured currently supports loading of text files, powerpoints, html, pdfs, images, and more. - -## Setup - -You can run Unstructured locally in your computer using Docker. To do so, you need to have Docker installed. You can find the instructions to install Docker [here](https://docs.docker.com/get-docker/). - -```bash -docker run -p 8000:8000 -d --rm --name unstructured-api downloads.unstructured.io/unstructured-io/unstructured-api:latest --port 8000 --host 0.0.0.0 -``` - -## Usage - -Once Unstructured is running, you can use it to load files from your computer. You can use the following code to load a file from your computer. - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/document_loaders/unstructured.ts"; - -{Example} - -## Directories - -You can also load all of the files in the directory using [`UnstructuredDirectoryLoader`](https://v02.api.js.langchain.com/classes/langchain_document_loaders_fs_unstructured.UnstructuredDirectoryLoader.html), which inherits from [`DirectoryLoader`](/docs/integrations/document_loaders/file_loaders/directory): - -import DirectoryExample from "@examples/document_loaders/unstructured_directory.ts"; - -{DirectoryExample} diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/firecrawl.ipynb b/docs/core_docs/docs/integrations/document_loaders/web_loaders/firecrawl.ipynb new file mode 100644 index 000000000000..ab81ec8b86c6 --- /dev/null +++ b/docs/core_docs/docs/integrations/document_loaders/web_loaders/firecrawl.ipynb @@ -0,0 +1,221 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: FireCrawl\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# FireCrawlLoader\n", + "\n", + "This notebook provides a quick overview for getting started with [FireCrawlLoader](/docs/integrations/document_loaders/). For detailed documentation of all FireCrawlLoader features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_community_document_loaders_web_firecrawl.FireCrawlLoader.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | Serializable | [PY support](https://python.langchain.com/docs/integrations/document_loaders/firecrawl)|\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "| [FireCrawlLoader](https://api.js.langchain.com/classes/langchain_community_document_loaders_web_firecrawl.FireCrawlLoader.html) | [@langchain/community](https://api.js.langchain.com/modules/langchain_community_document_loaders_web_firecrawl.html) | 🟠 (see details below) | beta | ✅ | \n", + "### Loader features\n", + "| Source | Web Loader | Node Envs Only\n", + "| :---: | :---: | :---: | \n", + "| FireCrawlLoader | ✅ | ❌ | \n", + "\n", + "[FireCrawl](https://firecrawl.dev) crawls and convert any website into LLM-ready data. It crawls all accessible sub-pages and give you clean markdown and metadata for each. No sitemap required.\n", + "\n", + "FireCrawl handles complex tasks such as reverse proxies, caching, rate limits, and content blocked by JavaScript. Built by the [mendable.ai](https://mendable.ai) team.\n", + "\n", + "This guide shows how to scrap and crawl entire websites and load them using the `FireCrawlLoader` in LangChain.\n", + "\n", + "## Setup\n", + "\n", + "To access `FireCrawlLoader` document loader you'll need to install the `@langchain/community` integration, and the `@mendable/firecrawl-js` package. Then create a **[FireCrawl](https://firecrawl.dev)** account and get an API key.\n", + "\n", + "### Credentials\n", + "\n", + "Sign up and get your free [FireCrawl API key](https://firecrawl.dev) to start. FireCrawl offers 300 free credits to get you started, and it's [open-source](https://github.com/mendableai/firecrawl) in case you want to self-host.\n", + "\n", + "Once you've done this set the `FIRECRAWL_API_KEY` environment variable:\n", + "\n", + "```bash\n", + "export FIRECRAWL_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```bash\n", + "# export LANGCHAIN_TRACING_V2=\"true\"\n", + "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain FireCrawlLoader integration lives in the `@langchain/community` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community @mendable/firecrawl-js\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Here's an example of how to use the `FireCrawlLoader` to load web search results:\n", + "\n", + "Firecrawl offers 2 modes: `scrape` and `crawl`. In `scrape` mode, Firecrawl will only scrape the page you provide. In `crawl` mode, Firecrawl will crawl the entire website.\n", + "\n", + "Now we can instantiate our model object and load documents:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import \"@mendable/firecrawl-js\";\n", + "import { FireCrawlLoader } from \"@langchain/community/document_loaders/web/firecrawl\"\n", + "\n", + "const loader = new FireCrawlLoader({\n", + " url: \"https://firecrawl.dev\", // The URL to scrape\n", + " apiKey: \"...\", // Optional, defaults to `FIRECRAWL_API_KEY` in your env.\n", + " mode: \"scrape\", // The mode to run the crawler in. Can be \"scrape\" for single urls or \"crawl\" for all accessible subpages\n", + " params: {\n", + " // optional parameters based on Firecrawl API docs\n", + " // For API documentation, visit https://docs.firecrawl.dev\n", + " },\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Document {\n", + " pageContent: \u001b[32m\"Introducing [Smart Crawl!](https://www.firecrawl.dev/smart-crawl)\\n\"\u001b[39m +\n", + " \u001b[32m\" Join the waitlist to turn any web\"\u001b[39m... 18721 more characters,\n", + " metadata: {\n", + " title: \u001b[32m\"Home - Firecrawl\"\u001b[39m,\n", + " description: \u001b[32m\"Firecrawl crawls and converts any website into clean markdown.\"\u001b[39m,\n", + " keywords: \u001b[32m\"Firecrawl,Markdown,Data,Mendable,Langchain\"\u001b[39m,\n", + " robots: \u001b[32m\"follow, index\"\u001b[39m,\n", + " ogTitle: \u001b[32m\"Firecrawl\"\u001b[39m,\n", + " ogDescription: \u001b[32m\"Turn any website into LLM-ready data.\"\u001b[39m,\n", + " ogUrl: \u001b[32m\"https://www.firecrawl.dev/\"\u001b[39m,\n", + " ogImage: \u001b[32m\"https://www.firecrawl.dev/og.png?123\"\u001b[39m,\n", + " ogLocaleAlternate: [],\n", + " ogSiteName: \u001b[32m\"Firecrawl\"\u001b[39m,\n", + " sourceURL: \u001b[32m\"https://firecrawl.dev\"\u001b[39m,\n", + " pageStatusCode: \u001b[33m500\u001b[39m\n", + " },\n", + " id: \u001b[90mundefined\u001b[39m\n", + "}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "const docs = await loader.load()\n", + "docs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " title: \"Home - Firecrawl\",\n", + " description: \"Firecrawl crawls and converts any website into clean markdown.\",\n", + " keywords: \"Firecrawl,Markdown,Data,Mendable,Langchain\",\n", + " robots: \"follow, index\",\n", + " ogTitle: \"Firecrawl\",\n", + " ogDescription: \"Turn any website into LLM-ready data.\",\n", + " ogUrl: \"https://www.firecrawl.dev/\",\n", + " ogImage: \"https://www.firecrawl.dev/og.png?123\",\n", + " ogLocaleAlternate: [],\n", + " ogSiteName: \"Firecrawl\",\n", + " sourceURL: \"https://firecrawl.dev\",\n", + " pageStatusCode: 500\n", + "}\n" + ] + } + ], + "source": [ + "console.log(docs[0].metadata)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Additional Parameters\n", + "\n", + "For `params` you can pass any of the params according to the [Firecrawl documentation](https://docs.firecrawl.dev)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all FireCrawlLoader features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_community_document_loaders_web_firecrawl.FireCrawlLoader.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Deno", + "language": "typescript", + "name": "deno" + }, + "language_info": { + "file_extension": ".ts", + "mimetype": "text/x.typescript", + "name": "typescript", + "nb_converter": "script", + "pygments_lexer": "typescript", + "version": "5.3.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/firecrawl.mdx b/docs/core_docs/docs/integrations/document_loaders/web_loaders/firecrawl.mdx deleted file mode 100644 index 59fecb799db3..000000000000 --- a/docs/core_docs/docs/integrations/document_loaders/web_loaders/firecrawl.mdx +++ /dev/null @@ -1,38 +0,0 @@ ---- -hide_table_of_contents: true ---- - -# Firecrawl - -This guide shows how to use [Firecrawl](https://firecrawl.dev) with LangChain to load web data into an LLM-ready format using Firecrawl. - -## Overview - -[FireCrawl](https://firecrawl.dev) crawls and convert any website into LLM-ready data. It crawls all accessible subpages and give you clean markdown and metadata for each. No sitemap required. - -FireCrawl handles complex tasks such as reverse proxies, caching, rate limits, and content blocked by JavaScript. Built by the [mendable.ai](https://mendable.ai) team. - -This guide shows how to scrap and crawl entire websites and load them using the `FireCrawlLoader` in LangChain. - -## Setup - -Sign up and get your free [FireCrawl API key](https://firecrawl.dev) to start. FireCrawl offers 300 free credits to get you started, and it's [open-source](https://github.com/mendableai/firecrawl) in case you want to self-host. - -## Usage - -Here's an example of how to use the `FireCrawlLoader` to load web search results: - -Firecrawl offers 2 modes: `scrape` and `crawl`. In `scrape` mode, Firecrawl will only scrape the page you provide. In `crawl` mode, Firecrawl will crawl the entire website. - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/document_loaders/firecrawl.ts"; - -```bash npm2yarn -npm install @mendable/firecrawl-js -``` - -{Example} - -### Additional Parameters - -For `params` you can pass any of the params according to the [Firecrawl documentation](https://docs.firecrawl.dev). diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/pdf.ipynb b/docs/core_docs/docs/integrations/document_loaders/web_loaders/pdf.ipynb new file mode 100644 index 000000000000..812ed2961124 --- /dev/null +++ b/docs/core_docs/docs/integrations/document_loaders/web_loaders/pdf.ipynb @@ -0,0 +1,323 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: PDF files\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# WebPDFLoader\n", + "\n", + "This notebook provides a quick overview for getting started with [WebPDFLoader](/docs/integrations/document_loaders/). For detailed documentation of all WebPDFLoader features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_community_document_loaders_web_pdf.WebPDFLoader.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | Serializable | PY support |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "| [WebPDFLoader](https://api.js.langchain.com/classes/langchain_community_document_loaders_web_pdf.WebPDFLoader.html) | [@langchain/community](https://api.js.langchain.com/modules/langchain_community_document_loaders_web_pdf.html) | ✅ | beta | ❌ | \n", + "### Loader features\n", + "| Source | Web Loader | Node Envs Only\n", + "| :---: | :---: | :---: | \n", + "| WebPDFLoader | ✅ | ❌ | \n", + "\n", + "You can use this version of the popular PDFLoader in web environments.\n", + "By default, one document will be created for each page in the PDF file, you can change this behavior by setting the `splitPages` option to `false`.\n", + "\n", + "## Setup\n", + "\n", + "To access `WebPDFLoader` document loader you'll need to install the `@langchain/community` integration, along with the `pdf-parse` package:\n", + "\n", + "### Credentials\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```bash\n", + "# export LANGCHAIN_TRACING_V2=\"true\"\n", + "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain WebPDFLoader integration lives in the `@langchain/community` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community pdf-parse\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and load documents:\n", + "\n", + "- TODO: Update model instantiation with relevant params." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import fs from \"fs/promises\";\n", + "import { WebPDFLoader } from \"@langchain/community/document_loaders/web/pdf\"\n", + "\n", + "const nike10kPDFPath = \"../../../../data/nke-10k-2023.pdf\";\n", + "\n", + "// Read the file as a buffer\n", + "const buffer = await fs.readFile(nike10kPDFPath);\n", + "\n", + "// Create a Blob from the buffer\n", + "const nike10kPDFBlob = new Blob([buffer], { type: 'application/pdf' });\n", + "\n", + "const loader = new WebPDFLoader(nike10kPDFBlob, {\n", + " // required params = ...\n", + " // optional params = ...\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document {\n", + " pageContent: 'Table of Contents\\n' +\n", + " 'UNITED STATES\\n' +\n", + " 'SECURITIES AND EXCHANGE COMMISSION\\n' +\n", + " 'Washington, D.C. 20549\\n' +\n", + " 'FORM 10-K\\n' +\n", + " '(Mark One)\\n' +\n", + " '☑ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934\\n' +\n", + " 'FOR THE FISCAL YEAR ENDED MAY 31, 2023\\n' +\n", + " 'OR\\n' +\n", + " '☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934\\n' +\n", + " 'FOR THE TRANSITION PERIOD FROM TO .\\n' +\n", + " 'Commission File No. 1-10635\\n' +\n", + " 'NIKE, Inc.\\n' +\n", + " '(Exact name of Registrant as specified in its charter)\\n' +\n", + " 'Oregon93-0584541\\n' +\n", + " '(State or other jurisdiction of incorporation)(IRS Employer Identification No.)\\n' +\n", + " 'One Bowerman Drive, Beaverton, Oregon 97005-6453\\n' +\n", + " '(Address of principal executive offices and zip code)\\n' +\n", + " '(503) 671-6453\\n' +\n", + " \"(Registrant's telephone number, including area code)\\n\" +\n", + " 'SECURITIES REGISTERED PURSUANT TO SECTION 12(B) OF THE ACT:\\n' +\n", + " 'Class B Common StockNKENew York Stock Exchange\\n' +\n", + " '(Title of each class)(Trading symbol)(Name of each exchange on which registered)\\n' +\n", + " 'SECURITIES REGISTERED PURSUANT TO SECTION 12(G) OF THE ACT:\\n' +\n", + " 'NONE\\n' +\n", + " 'Indicate by check mark:YESNO\\n' +\n", + " '•if the registrant is a well-known seasoned issuer, as defined in Rule 405 of the Securities Act.þ ̈\\n' +\n", + " '•if the registrant is not required to file reports pursuant to Section 13 or Section 15(d) of the Act. ̈þ\\n' +\n", + " '•whether the registrant (1) has filed all reports required to be filed by Section 13 or 15(d) of the Securities Exchange Act of 1934 during the preceding\\n' +\n", + " '12 months (or for such shorter period that the registrant was required to file such reports), and (2) has been subject to such filing requirements for the\\n' +\n", + " 'past 90 days.\\n' +\n", + " 'þ ̈\\n' +\n", + " '•whether the registrant has submitted electronically every Interactive Data File required to be submitted pursuant to Rule 405 of Regulation S-T\\n' +\n", + " '(§232.405 of this chapter) during the preceding 12 months (or for such shorter period that the registrant was required to submit such files).\\n' +\n", + " 'þ ̈\\n' +\n", + " '•whether the registrant is a large accelerated filer, an accelerated filer, a non-accelerated filer, a smaller reporting company or an emerging growth company. See the definitions of “large accelerated filer,”\\n' +\n", + " '“accelerated filer,” “smaller reporting company,” and “emerging growth company” in Rule 12b-2 of the Exchange Act.\\n' +\n", + " 'Large accelerated filerþAccelerated filer☐Non-accelerated filer☐Smaller reporting company☐Emerging growth company☐\\n' +\n", + " '•if an emerging growth company, if the registrant has elected not to use the extended transition period for complying with any new or revised financial\\n' +\n", + " 'accounting standards provided pursuant to Section 13(a) of the Exchange Act.\\n' +\n", + " ' ̈\\n' +\n", + " \"•whether the registrant has filed a report on and attestation to its management's assessment of the effectiveness of its internal control over financial\\n\" +\n", + " 'reporting under Section 404(b) of the Sarbanes-Oxley Act (15 U.S.C. 7262(b)) by the registered public accounting firm that prepared or issued its audit\\n' +\n", + " 'report.\\n' +\n", + " 'þ\\n' +\n", + " '•if securities are registered pursuant to Section 12(b) of the Act, whether the financial statements of the registrant included in the filing reflect the\\n' +\n", + " 'correction of an error to previously issued financial statements.\\n' +\n", + " ' ̈\\n' +\n", + " '•whether any of those error corrections are restatements that required a recovery analysis of incentive-based compensation received by any of the\\n' +\n", + " \"registrant's executive officers during the relevant recovery period pursuant to § 240.10D-1(b).\\n\" +\n", + " ' ̈\\n' +\n", + " '•\\n' +\n", + " 'whether the registrant is a shell company (as defined in Rule 12b-2 of the Act).☐þ\\n' +\n", + " \"As of November 30, 2022, the aggregate market values of the Registrant's Common Stock held by non-affiliates were:\\n\" +\n", + " 'Class A$7,831,564,572 \\n' +\n", + " 'Class B136,467,702,472 \\n' +\n", + " '$144,299,267,044 ',\n", + " metadata: {\n", + " pdf: {\n", + " version: '1.10.100',\n", + " info: [Object],\n", + " metadata: null,\n", + " totalPages: 107\n", + " },\n", + " loc: { pageNumber: 1 }\n", + " },\n", + " id: undefined\n", + "}\n" + ] + } + ], + "source": [ + "const docs = await loader.load()\n", + "docs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " pdf: {\n", + " version: '1.10.100',\n", + " info: {\n", + " PDFFormatVersion: '1.4',\n", + " IsAcroFormPresent: false,\n", + " IsXFAPresent: false,\n", + " Title: '0000320187-23-000039',\n", + " Author: 'EDGAR Online, a division of Donnelley Financial Solutions',\n", + " Subject: 'Form 10-K filed on 2023-07-20 for the period ending 2023-05-31',\n", + " Keywords: '0000320187-23-000039; ; 10-K',\n", + " Creator: 'EDGAR Filing HTML Converter',\n", + " Producer: 'EDGRpdf Service w/ EO.Pdf 22.0.40.0',\n", + " CreationDate: \"D:20230720162200-04'00'\",\n", + " ModDate: \"D:20230720162208-04'00'\"\n", + " },\n", + " metadata: null,\n", + " totalPages: 107\n", + " },\n", + " loc: { pageNumber: 1 }\n", + "}\n" + ] + } + ], + "source": [ + "console.log(docs[0].metadata)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage, custom `pdfjs` build\n", + "\n", + "By default we use the `pdfjs` build bundled with `pdf-parse`, which is compatible with most environments, including Node.js and modern browsers. If you want to use a more recent version of `pdfjs-dist` or if you want to use a custom build of `pdfjs-dist`, you can do so by providing a custom `pdfjs` function that returns a promise that resolves to the `PDFJS` object.\n", + "\n", + "In the following example we use the \"legacy\" (see [pdfjs docs](https://github.com/mozilla/pdf.js/wiki/Frequently-Asked-Questions#which-browsersenvironments-are-supported)) build of `pdfjs-dist`, which includes several polyfills not included in the default build.\n", + "\n", + "```{=mdx}\n", + "\n", + " pdfjs-dist\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import { WebPDFLoader } from \"@langchain/community/document_loaders/web/pdf\";\n", + "\n", + "const blob = new Blob(); // e.g. from a file input\n", + "\n", + "const customBuildLoader = new WebPDFLoader(blob, {\n", + " // you may need to add `.then(m => m.default)` to the end of the import\n", + " // @lc-ts-ignore\n", + " pdfjs: () => import(\"pdfjs-dist/legacy/build/pdf.js\"),\n", + "});" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Eliminating extra spaces\n", + "\n", + "PDFs come in many varieties, which makes reading them a challenge. The loader parses individual text elements and joins them together with a space by default, but\n", + "if you are seeing excessive spaces, this may not be the desired behavior. In that case, you can override the separator with an empty string like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import { WebPDFLoader } from \"@langchain/community/document_loaders/web/pdf\";\n", + "\n", + "// new Blob(); e.g. from a file input\n", + "const eliminatingExtraSpacesLoader = new WebPDFLoader(new Blob(), {\n", + " parsedItemSeparator: \"\",\n", + "});" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all WebPDFLoader features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_community_document_loaders_web_pdf.WebPDFLoader.html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/pdf.mdx b/docs/core_docs/docs/integrations/document_loaders/web_loaders/pdf.mdx deleted file mode 100644 index 64e96247765c..000000000000 --- a/docs/core_docs/docs/integrations/document_loaders/web_loaders/pdf.mdx +++ /dev/null @@ -1,53 +0,0 @@ -# PDF files - -You can use this version of the popular PDFLoader in web environments. -By default, one document will be created for each page in the PDF file, you can change this behavior by setting the `splitPages` option to `false`. - -## Setup - -```bash npm2yarn -npm install pdf-parse -``` - -## Usage - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/document_loaders/web_pdf.ts"; - -{Example} - -## Usage, custom `pdfjs` build - -By default we use the `pdfjs` build bundled with `pdf-parse`, which is compatible with most environments, including Node.js and modern browsers. If you want to use a more recent version of `pdfjs-dist` or if you want to use a custom build of `pdfjs-dist`, you can do so by providing a custom `pdfjs` function that returns a promise that resolves to the `PDFJS` object. - -In the following example we use the "legacy" (see [pdfjs docs](https://github.com/mozilla/pdf.js/wiki/Frequently-Asked-Questions#which-browsersenvironments-are-supported)) build of `pdfjs-dist`, which includes several polyfills not included in the default build. - -```bash npm2yarn -npm install pdfjs-dist -``` - -```typescript -import { WebPDFLoader } from "@langchain/community/document_loaders/web/pdf"; - -const blob = new Blob(); // e.g. from a file input - -const loader = new WebPDFLoader(blob, { - // you may need to add `.then(m => m.default)` to the end of the import - pdfjs: () => import("pdfjs-dist/legacy/build/pdf.js"), -}); -``` - -## Eliminating extra spaces - -PDFs come in many varieties, which makes reading them a challenge. The loader parses individual text elements and joins them together with a space by default, but -if you are seeing excessive spaces, this may not be the desired behavior. In that case, you can override the separator with an empty string like this: - -```typescript -import { WebPDFLoader } from "@langchain/community/document_loaders/web/pdf"; - -const blob = new Blob(); // e.g. from a file input - -const loader = new WebPDFLoader(blob, { - parsedItemSeparator: "", -}); -``` diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/recursive_url_loader.ipynb b/docs/core_docs/docs/integrations/document_loaders/web_loaders/recursive_url_loader.ipynb new file mode 100644 index 000000000000..ec13013b245c --- /dev/null +++ b/docs/core_docs/docs/integrations/document_loaders/web_loaders/recursive_url_loader.ipynb @@ -0,0 +1,449 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: RecursiveUrlLoader\n", + "sidebar_class_name: node-only\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RecursiveUrlLoader\n", + "\n", + "```{=mdx}\n", + "\n", + ":::tip Compatibility\n", + "\n", + "Only available on Node.js.\n", + "\n", + ":::\n", + "\n", + "```\n", + "\n", + "This notebook provides a quick overview for getting started with [RecursiveUrlLoader](/docs/integrations/document_loaders/). For detailed documentation of all RecursiveUrlLoader features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_community_document_loaders_web_recursive_url.RecursiveUrlLoader.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | Serializable | PY support |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "| [RecursiveUrlLoader](https://api.js.langchain.com/classes/langchain_community_document_loaders_web_recursive_url.RecursiveUrlLoader.html) | [@langchain/community](https://api.js.langchain.com/modules/langchain_community_document_loaders_web_recursive_url.html) | ✅ | beta | ❌ | \n", + "### Loader features\n", + "| Source | Web Loader | Node Envs Only\n", + "| :---: | :---: | :---: | \n", + "| RecursiveUrlLoader | ✅ | ✅ | \n", + "\n", + "When loading content from a website, we may want to process load all URLs on a page.\n", + "\n", + "For example, let's look at the [LangChain.js introduction](/docs/introduction) docs.\n", + "\n", + "This has many interesting child pages that we may want to load, split, and later retrieve in bulk.\n", + "\n", + "The challenge is traversing the tree of child pages and assembling a list!\n", + "\n", + "We do this using the `RecursiveUrlLoader`.\n", + "\n", + "This also gives us the flexibility to exclude some children, customize the extractor, and more.\n", + "\n", + "## Setup\n", + "\n", + "To access `RecursiveUrlLoader` document loader you'll need to install the `@langchain/community` integration, and the [`jsdom`](https://www.npmjs.com/package/jsdom) package.\n", + "\n", + "### Credentials\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```bash\n", + "# export LANGCHAIN_TRACING_V2=\"true\"\n", + "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain RecursiveUrlLoader integration lives in the `@langchain/community` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community jsdom\n", + "\n", + "\n", + "We also suggest adding a package like [`html-to-text`](https://www.npmjs.com/package/html-to-text) or\n", + "[`@mozilla/readability`](https://www.npmjs.com/package/@mozilla/readability) for extracting the raw text from the page.\n", + "\n", + "\n", + " html-to-text\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and load documents:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import { RecursiveUrlLoader } from \"@langchain/community/document_loaders/web/recursive_url\"\n", + "import { compile } from \"html-to-text\";\n", + "\n", + "const compiledConvert = compile({ wordwrap: 130 }); // returns (text: string) => string;\n", + "\n", + "const loader = new RecursiveUrlLoader(\"https://langchain.com/\", {\n", + " extractor: compiledConvert,\n", + " maxDepth: 1,\n", + " excludeDirs: [\"/docs/api/\"],\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " pageContent: '\\n' +\n", + " '/\\n' +\n", + " 'Products\\n' +\n", + " '\\n' +\n", + " 'LangChain [/langchain]LangSmith [/langsmith]LangGraph [/langgraph]\\n' +\n", + " 'Methods\\n' +\n", + " '\\n' +\n", + " 'Retrieval [/retrieval]Agents [/agents]Evaluation [/evaluation]\\n' +\n", + " 'Resources\\n' +\n", + " '\\n' +\n", + " 'Blog [https://blog.langchain.dev/]Case Studies [/case-studies]Use Case Inspiration [/use-cases]Experts [/experts]Changelog\\n' +\n", + " '[https://changelog.langchain.com/]\\n' +\n", + " 'Docs\\n' +\n", + " '\\n' +\n", + " 'LangChain Docs [https://python.langchain.com/v0.2/docs/introduction/]LangSmith Docs [https://docs.smith.langchain.com/]\\n' +\n", + " 'Company\\n' +\n", + " '\\n' +\n", + " 'About [/about]Careers [/careers]\\n' +\n", + " 'Pricing [/pricing]\\n' +\n", + " 'Get a demo [/contact-sales]\\n' +\n", + " 'Sign up [https://smith.langchain.com/]\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'LangChain’s suite of products supports developers along each step of the LLM application lifecycle.\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'APPLICATIONS THAT CAN REASON. POWERED BY LANGCHAIN.\\n' +\n", + " '\\n' +\n", + " 'Get a demo [/contact-sales]Sign up for free [https://smith.langchain.com/]\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'FROM STARTUPS TO GLOBAL ENTERPRISES,\\n' +\n", + " 'AMBITIOUS BUILDERS CHOOSE\\n' +\n", + " 'LANGCHAIN PRODUCTS.\\n' +\n", + " '\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65ca3b7c22746faa78338532_logo_Ally.svg][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65ca3b7c08e67bb7eefba4c2_logo_Rakuten.svg][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65ca3b7c576fdde32d03c1a0_logo_Elastic.svg][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65ca3b7c6d5592036dae24e5_logo_BCG.svg][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/667f19528c3557c2c19c3086_the-home-depot-2%201.png][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65ca3b7cbcf6473519b06d84_logo_IDEO.svg][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65ca3b7cb5f96dcc100ee3b7_logo_Zapier.svg][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/6606183e52d49bc369acc76c_mdy_logo_rgb_moodysblue.png][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65ca3b7c8ad7db6ed6ec611e_logo_Adyen.svg][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65ca3b7c737d50036a62768b_logo_Infor.svg][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/667f59d98444a5f98aabe21c_acxiom-vector-logo-2022%201.png][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65ca3b7c09a158ffeaab0bd2_logo_Replit.svg][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65ca3b7c9d2b23d292a0cab0_logo_Retool.svg][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65ca3b7c44e67a3d0a996bf3_logo_Databricks.svg][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/667f5a1299d6ba453c78a849_image%20(19).png][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65ca3b7c63af578816bafcc3_logo_Instacart.svg][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/665dc1dabc940168384d9596_podium%20logo.svg]\\n' +\n", + " '\\n' +\n", + " 'Build\\n' +\n", + " '\\n' +\n", + " 'LangChain is a framework to build with LLMs by chaining interoperable components. LangGraph is the framework for building\\n' +\n", + " 'controllable agentic workflows.\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'Run\\n' +\n", + " '\\n' +\n", + " 'Deploy your LLM applications at scale with LangGraph Cloud, our infrastructure purpose-built for agents.\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'Manage\\n' +\n", + " '\\n' +\n", + " \"Debug, collaborate, test, and monitor your LLM app in LangSmith - whether it's built with a LangChain framework or not. \\n\" +\n", + " '\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'BUILD YOUR APP WITH LANGCHAIN\\n' +\n", + " '\\n' +\n", + " 'Build context-aware, reasoning applications with LangChain’s flexible framework that leverages your company’s data and APIs.\\n' +\n", + " 'Future-proof your application by making vendor optionality part of your LLM infrastructure design.\\n' +\n", + " '\\n' +\n", + " 'Learn more about LangChain\\n' +\n", + " '\\n' +\n", + " '[/langchain]\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'RUN AT SCALE WITH LANGGRAPH CLOUD\\n' +\n", + " '\\n' +\n", + " 'Deploy your LangGraph app with LangGraph Cloud for fault-tolerant scalability - including support for async background jobs,\\n' +\n", + " 'built-in persistence, and distributed task queues.\\n' +\n", + " '\\n' +\n", + " 'Learn more about LangGraph\\n' +\n", + " '\\n' +\n", + " '[/langgraph]\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/667c6d7284e58f4743a430e6_Langgraph%20UI-home-2.webp]\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'MANAGE LLM PERFORMANCE WITH LANGSMITH\\n' +\n", + " '\\n' +\n", + " 'Ship faster with LangSmith’s debug, test, deploy, and monitoring workflows. Don’t rely on “vibes” – add engineering rigor to your\\n' +\n", + " 'LLM-development workflow, whether you’re building with LangChain or not.\\n' +\n", + " '\\n' +\n", + " 'Learn more about LangSmith\\n' +\n", + " '\\n' +\n", + " '[/langsmith]\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'HEAR FROM OUR HAPPY CUSTOMERS\\n' +\n", + " '\\n' +\n", + " 'LangChain, LangGraph, and LangSmith help teams of all sizes, across all industries - from ambitious startups to established\\n' +\n", + " 'enterprises.\\n' +\n", + " '\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65c5308aee06d9826765c897_Retool_logo%201.png]\\n' +\n", + " '\\n' +\n", + " '“LangSmith helped us improve the accuracy and performance of Retool’s fine-tuned models. Not only did we deliver a better product\\n' +\n", + " 'by iterating with LangSmith, but we’re shipping new AI features to our users in a fraction of the time it would have taken without\\n' +\n", + " 'it.”\\n' +\n", + " '\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65c5308abdd2dbbdde5a94a1_Jamie%20Cuffe.png]\\n' +\n", + " 'Jamie Cuffe\\n' +\n", + " 'Head of Self-Serve and New Products\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65c5308a04d37cf7d3eb1341_Rakuten_Global_Brand_Logo.png]\\n' +\n", + " '\\n' +\n", + " '“By combining the benefits of LangSmith and standing on the shoulders of a gigantic open-source community, we’re able to identify\\n' +\n", + " 'the right approaches of using LLMs in an enterprise-setting faster.”\\n' +\n", + " '\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65c5308a8b6137d44c621cb4_Yusuke%20Kaji.png]\\n' +\n", + " 'Yusuke Kaji\\n' +\n", + " 'General Manager of AI\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65c5308aea1371b447cc4af9_elastic-ar21.png]\\n' +\n", + " '\\n' +\n", + " '“Working with LangChain and LangSmith on the Elastic AI Assistant had a significant positive impact on the overall pace and\\n' +\n", + " 'quality of the development and shipping experience. We couldn’t have achieved  the product experience delivered to our customers\\n' +\n", + " 'without LangChain, and we couldn’t have done it at the same pace without LangSmith.”\\n' +\n", + " '\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65c5308a4095d5a871de7479_James%20Spiteri.png]\\n' +\n", + " 'James Spiteri\\n' +\n", + " 'Director of Security Products\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65c530539f4824b828357352_Logo_de_Fintual%201.png]\\n' +\n", + " '\\n' +\n", + " '“As soon as we heard about LangSmith, we moved our entire development stack onto it. We could have built evaluation, testing and\\n' +\n", + " 'monitoring tools in house, but with LangSmith it took us 10x less time to get a 1000x better tool.”\\n' +\n", + " '\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65c53058acbff86f4c2dcee2_jose%20pena.png]\\n' +\n", + " 'Jose Peña\\n' +\n", + " 'Senior Manager\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'THE REFERENCE ARCHITECTURE ENTERPRISES ADOPT FOR SUCCESS.\\n' +\n", + " '\\n' +\n", + " 'LangChain’s suite of products can be used independently or stacked together for multiplicative impact – guiding you through\\n' +\n", + " 'building, running, and managing your LLM apps.\\n' +\n", + " '\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/6695b116b0b60c78fd4ef462_15.07.24%20-Updated%20stack%20diagram%20-%20lightfor%20website-3.webp][https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/667d392696fc0bc3e17a6d04_New%20LC%20stack%20-%20light-2.webp]\\n' +\n", + " '15M+\\n' +\n", + " 'Monthly Downloads\\n' +\n", + " '100K+\\n' +\n", + " 'Apps Powered\\n' +\n", + " '75K+\\n' +\n", + " 'GitHub Stars\\n' +\n", + " '3K+\\n' +\n", + " 'Contributors\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'THE BIGGEST DEVELOPER COMMUNITY IN GENAI\\n' +\n", + " '\\n' +\n", + " 'Learn alongside the 1M+ developers who are pushing the industry forward.\\n' +\n", + " '\\n' +\n", + " 'Explore LangChain\\n' +\n", + " '\\n' +\n", + " '[/langchain]\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'GET STARTED WITH THE LANGSMITH PLATFORM TODAY\\n' +\n", + " '\\n' +\n", + " 'Get a demo [/contact-sales]Sign up for free [https://smith.langchain.com/]\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65ccf12801bc39bf912a58f3_Home%20C.webp]\\n' +\n", + " '\\n' +\n", + " 'Teams building with LangChain are driving operational efficiency, increasing discovery & personalization, and delivering premium\\n' +\n", + " 'products that generate revenue.\\n' +\n", + " '\\n' +\n", + " 'Discover Use Cases\\n' +\n", + " '\\n' +\n", + " '[/use-cases]\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'GET INSPIRED BY COMPANIES WHO HAVE DONE IT.\\n' +\n", + " '\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65bcd7ee85507bdf350399c3_Ally_Financial%201.svg]\\n' +\n", + " 'Financial Services\\n' +\n", + " '\\n' +\n", + " '[https://blog.langchain.dev/ally-financial-collaborates-with-langchain-to-deliver-critical-coding-module-to-mask-personal-identifying-information-in-a-compliant-and-safe-manner/]\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65bcd8b3ae4dc901daa3037a_Adyen_Corporate_Logo%201.svg]\\n' +\n", + " 'FinTech\\n' +\n", + " '\\n' +\n", + " '[https://blog.langchain.dev/llms-accelerate-adyens-support-team-through-smart-ticket-routing-and-support-agent-copilot/]\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65c534b3fa387379c0f4ebff_elastic-ar21%20(1).png]\\n' +\n", + " 'Technology\\n' +\n", + " '\\n' +\n", + " '[https://blog.langchain.dev/langchain-partners-with-elastic-to-launch-the-elastic-ai-assistant/]\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'LANGSMITH IS THE ENTERPRISE DEVOPS PLATFORM BUILT FOR LLMS.\\n' +\n", + " '\\n' +\n", + " 'Explore LangSmith\\n' +\n", + " '\\n' +\n", + " '[/langsmith]\\n' +\n", + " 'Gain visibility to make trade offs between cost, latency, and quality.\\n' +\n", + " 'Increase developer productivity.\\n' +\n", + " 'Eliminate manual, error-prone testing.\\n' +\n", + " 'Reduce hallucinations and improve reliability.\\n' +\n", + " 'Enterprise deployment options to keep data secure.\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'READY TO START SHIPPING 
RELIABLE GENAI APPS FASTER?\\n' +\n", + " '\\n' +\n", + " 'Get started with LangChain, LangGraph, and LangSmith to enhance your LLM app development, from prototype to production.\\n' +\n", + " '\\n' +\n", + " 'Get a demo [/contact-sales]Sign up for free [https://smith.langchain.com/]\\n' +\n", + " 'Products\\n' +\n", + " 'LangChain [/langchain]LangSmith [/langsmith]LangGraph [/langgraph]Agents [/agents]Evaluation [/evaluation]Retrieval [/retrieval]\\n' +\n", + " 'Resources\\n' +\n", + " 'Python Docs [https://python.langchain.com/]JS/TS Docs [https://js.langchain.com/docs/get_started/introduction/]GitHub\\n' +\n", + " '[https://github.com/langchain-ai]Integrations [https://python.langchain.com/v0.2/docs/integrations/platforms/]Templates\\n' +\n", + " '[https://templates.langchain.com/]Changelog [https://changelog.langchain.com/]LangSmith Trust Portal\\n' +\n", + " '[https://trust.langchain.com/]\\n' +\n", + " 'Company\\n' +\n", + " 'About [/about]Blog [https://blog.langchain.dev/]Twitter [https://twitter.com/LangChainAI]LinkedIn\\n' +\n", + " '[https://www.linkedin.com/company/langchain/]YouTube [https://www.youtube.com/@LangChain]Community [/join-community]Marketing\\n' +\n", + " 'Assets [https://drive.google.com/drive/folders/17xybjzmVBdsQA-VxouuGLxF6bDsHDe80?usp=sharing]\\n' +\n", + " 'Sign up for our newsletter to stay up to date\\n' +\n", + " 'Thank you! Your submission has been received!\\n' +\n", + " 'Oops! Something went wrong while submitting the form.\\n' +\n", + " '[https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/65c6a38f9c53ec71f5fc73de_langchain-word.svg]\\n' +\n", + " 'All systems operational\\n' +\n", + " '[https://status.smith.langchain.com/]Privacy Policy [/'... 111 more characters,\n", + " metadata: {\n", + " source: 'https://langchain.com/',\n", + " title: 'LangChain',\n", + " description: 'LangChain’s suite of products supports developers along each step of their development journey.',\n", + " language: 'en'\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "const docs = await loader.load()\n", + "docs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " source: 'https://langchain.com/',\n", + " title: 'LangChain',\n", + " description: 'LangChain’s suite of products supports developers along each step of their development journey.',\n", + " language: 'en'\n", + "}\n" + ] + } + ], + "source": [ + "console.log(docs[0].metadata)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Options\n", + "\n", + "```typescript\n", + "interface Options {\n", + " excludeDirs?: string[]; // webpage directories to exclude.\n", + " extractor?: (text: string) => string; // a function to extract the text of the document from the webpage, by default it returns the page as it is. It is recommended to use tools like html-to-text to extract the text. By default, it just returns the page as it is.\n", + " maxDepth?: number; // the maximum depth to crawl. By default, it is set to 2. If you need to crawl the whole website, set it to a number that is large enough would simply do the job.\n", + " timeout?: number; // the timeout for each request, in the unit of seconds. By default, it is set to 10000 (10 seconds).\n", + " preventOutside?: boolean; // whether to prevent crawling outside the root url. By default, it is set to true.\n", + " callerOptions?: AsyncCallerConstructorParams; // the options to call the AsyncCaller for example setting max concurrency (default is 64)\n", + "}\n", + "```\n", + "\n", + "However, since it's hard to perform a perfect filter, you may still see some irrelevant results in the results. You can perform a filter on the returned documents by yourself, if it's needed. Most of the time, the returned results are good enough." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all RecursiveUrlLoader features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_community_document_loaders_web_recursive_url.RecursiveUrlLoader.html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/recursive_url_loader.mdx b/docs/core_docs/docs/integrations/document_loaders/web_loaders/recursive_url_loader.mdx deleted file mode 100644 index ddcb358c3056..000000000000 --- a/docs/core_docs/docs/integrations/document_loaders/web_loaders/recursive_url_loader.mdx +++ /dev/null @@ -1,67 +0,0 @@ ---- -sidebar_class_name: node-only -hide_table_of_contents: true ---- - -# Recursive URL Loader - -When loading content from a website, we may want to process load all URLs on a page. - -For example, let's look at the [LangChain.js introduction](/docs/introduction) docs. - -This has many interesting child pages that we may want to load, split, and later retrieve in bulk. - -The challenge is traversing the tree of child pages and assembling a list! - -We do this using the RecursiveUrlLoader. - -This also gives us the flexibility to exclude some children, customize the extractor, and more. - -## Setup - -To get started, you'll need to install the [`jsdom`](https://www.npmjs.com/package/jsdom) package: - -```bash npm2yarn -npm i jsdom -``` - -We also suggest adding a package like [`html-to-text`](https://www.npmjs.com/package/html-to-text) or -[`@mozilla/readability`](https://www.npmjs.com/package/@mozilla/readability) for extracting the raw text from the page. - -```bash npm2yarn -npm i html-to-text -``` - -## Usage - -```typescript -import { compile } from "html-to-text"; -import { RecursiveUrlLoader } from "@langchain/community/document_loaders/web/recursive_url"; - -const url = "/docs/introduction"; - -const compiledConvert = compile({ wordwrap: 130 }); // returns (text: string) => string; - -const loader = new RecursiveUrlLoader(url, { - extractor: compiledConvert, - maxDepth: 1, - excludeDirs: ["/docs/api/"], -}); - -const docs = await loader.load(); -``` - -## Options - -```typescript -interface Options { - excludeDirs?: string[]; // webpage directories to exclude. - extractor?: (text: string) => string; // a function to extract the text of the document from the webpage, by default it returns the page as it is. It is recommended to use tools like html-to-text to extract the text. By default, it just returns the page as it is. - maxDepth?: number; // the maximum depth to crawl. By default, it is set to 2. If you need to crawl the whole website, set it to a number that is large enough would simply do the job. - timeout?: number; // the timeout for each request, in the unit of seconds. By default, it is set to 10000 (10 seconds). - preventOutside?: boolean; // whether to prevent crawling outside the root url. By default, it is set to true. - callerOptions?: AsyncCallerConstructorParams; // the options to call the AsyncCaller for example setting max concurrency (default is 64) -} -``` - -However, since it's hard to perform a perfect filter, you may still see some irrelevant results in the results. You can perform a filter on the returned documents by yourself, if it's needed. Most of the time, the returned results are good enough. diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/web_cheerio.ipynb b/docs/core_docs/docs/integrations/document_loaders/web_loaders/web_cheerio.ipynb index b12e3a8e5a00..488205129b1d 100644 --- a/docs/core_docs/docs/integrations/document_loaders/web_loaders/web_cheerio.ipynb +++ b/docs/core_docs/docs/integrations/document_loaders/web_loaders/web_cheerio.ipynb @@ -5,7 +5,7 @@ "metadata": {}, "source": [ "---\n", - "sidebar_label: CheerioWebBaseLoader\n", + "sidebar_label: Cheerio\n", "---" ] }, @@ -36,8 +36,6 @@ "\n", "## Setup\n", "\n", - "- TODO: Update with relevant info.\n", - "\n", "To access `CheerioWebBaseLoader` document loader you'll need to install the `@langchain/community` integration package, along with the `cheerio` peer dependency.\n", "\n", "### Credentials\n", @@ -72,9 +70,7 @@ "source": [ "## Instantiation\n", "\n", - "Now we can instantiate our model object and load documents:\n", - "\n", - "- TODO: Update model instantiation with relevant params." + "Now we can instantiate our model object and load documents:" ] }, { diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/web_puppeteer.ipynb b/docs/core_docs/docs/integrations/document_loaders/web_loaders/web_puppeteer.ipynb new file mode 100644 index 000000000000..332ded7ae820 --- /dev/null +++ b/docs/core_docs/docs/integrations/document_loaders/web_loaders/web_puppeteer.ipynb @@ -0,0 +1,543 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Puppeteer\n", + "sidebar_class_name: node-only\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PuppeteerWebBaseLoader\n", + "\n", + "```{=mdx}\n", + ":::tip Compatibility\n", + "\n", + "Only available on Node.js.\n", + "\n", + ":::\n", + "```\n", + "\n", + "This notebook provides a quick overview for getting started with [PuppeteerWebBaseLoader](/docs/integrations/document_loaders/). For detailed documentation of all PuppeteerWebBaseLoader features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_community_document_loaders_web_puppeteer.PuppeteerWebBaseLoader.html).\n", + "\n", + "Puppeteer is a Node.js library that provides a high-level API for controlling headless Chrome or Chromium. You can use Puppeteer to automate web page interactions, including extracting data from dynamic web pages that require JavaScript to render.\n", + "\n", + "If you want a lighterweight solution, and the webpages you want to load do not require JavaScript to render, you can use the [CheerioWebBaseLoader](/docs/integrations/document_loaders/web_loaders/web_cheerio) instead.\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | Serializable | PY support |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "| [PuppeteerWebBaseLoader](https://api.js.langchain.com/classes/langchain_community_document_loaders_web_puppeteer.PuppeteerWebBaseLoader.html) | [@langchain/community](https://api.js.langchain.com/modules/langchain_community_document_loaders_web_puppeteer.html) | ✅ | beta | ❌ | \n", + "### Loader features\n", + "| Source | Web Loader | Node Envs Only\n", + "| :---: | :---: | :---: | \n", + "| PuppeteerWebBaseLoader | ✅ | ✅ | \n", + "\n", + "## Setup\n", + "\n", + "To access `PuppeteerWebBaseLoader` document loader you'll need to install the `@langchain/community` integration package, along with the `puppeteer` peer dependency.\n", + "\n", + "### Credentials\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```bash\n", + "# export LANGCHAIN_TRACING_V2=\"true\"\n", + "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain PuppeteerWebBaseLoader integration lives in the `@langchain/community` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community puppeteer\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and load documents:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import { PuppeteerWebBaseLoader } from \"@langchain/community/document_loaders/web/puppeteer\"\n", + "\n", + "const loader = new PuppeteerWebBaseLoader(\"https://langchain.com\", {\n", + " // required params = ...\n", + " // optional params = ...\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document {\n", + " pageContent: '
\\n' +\n", + " '\\n' +\n", + " ' Promise;\n", + "};\n", + "```\n", + "\n", + "1. `launchOptions`: an optional object that specifies additional options to pass to the puppeteer.launch() method. This can include options such as the headless flag to launch the browser in headless mode, or the slowMo option to slow down Puppeteer's actions to make them easier to follow.\n", + "\n", + "2. `gotoOptions`: an optional object that specifies additional options to pass to the page.goto() method. This can include options such as the timeout option to specify the maximum navigation time in milliseconds, or the waitUntil option to specify when to consider the navigation as successful.\n", + "\n", + "3. `evaluate`: an optional function that can be used to evaluate JavaScript code on the page using the page.evaluate() method. This can be useful for extracting data from the page or interacting with page elements. The function should return a Promise that resolves to a string containing the result of the evaluation.\n", + "\n", + "By passing these options to the `PuppeteerWebBaseLoader` constructor, you can customize the behavior of the loader and use Puppeteer's powerful features to scrape and interact with web pages.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Screenshots\n", + "\n", + "To take a screenshot of a site, initialize the loader the same as above, and call the `.screenshot()` method.\n", + "This will return an instance of `Document` where the page content is a base64 encoded image, and the metadata contains a `source` field with the URL of the page." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "iVBORw0KGgoAAAANSUhEUgAACWAAAAdoCAIAAAA/Q2IJAAAAAXNSR0IArs4c6QAAIABJREFUeJzsvUuzHUeSJuaPiMjMk3nOuU88\n", + "{ source: 'https://langchain.com' }\n" + ] + } + ], + "source": [ + "import { PuppeteerWebBaseLoader } from \"@langchain/community/document_loaders/web/puppeteer\";\n", + "\n", + "const loaderForScreenshot = new PuppeteerWebBaseLoader(\"https://langchain.com\", {\n", + " launchOptions: {\n", + " headless: true,\n", + " },\n", + " gotoOptions: {\n", + " waitUntil: \"domcontentloaded\",\n", + " },\n", + "});\n", + "const screenshot = await loaderForScreenshot.screenshot();\n", + "\n", + "console.log(screenshot.pageContent.slice(0, 100));\n", + "console.log(screenshot.metadata);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all PuppeteerWebBaseLoader features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_community_document_loaders_web_puppeteer.PuppeteerWebBaseLoader.html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/web_puppeteer.mdx b/docs/core_docs/docs/integrations/document_loaders/web_loaders/web_puppeteer.mdx deleted file mode 100644 index 8398a175eec8..000000000000 --- a/docs/core_docs/docs/integrations/document_loaders/web_loaders/web_puppeteer.mdx +++ /dev/null @@ -1,73 +0,0 @@ ---- -sidebar_position: 2 -sidebar_label: Puppeteer -hide_table_of_contents: true -sidebar_class_name: node-only ---- - -# Webpages, with Puppeteer - -:::tip Compatibility -Only available on Node.js. -::: - -This example goes over how to load data from webpages using Puppeteer. One document will be created for each webpage. - -Puppeteer is a Node.js library that provides a high-level API for controlling headless Chrome or Chromium. You can use Puppeteer to automate web page interactions, including extracting data from dynamic web pages that require JavaScript to render. - -If you want a lighterweight solution, and the webpages you want to load do not require JavaScript to render, you can use the [CheerioWebBaseLoader](/docs/integrations/document_loaders/web_loaders/web_cheerio) instead. - -## Setup - -```bash npm2yarn -npm install puppeteer -``` - -## Usage - -```typescript -import { PuppeteerWebBaseLoader } from "@langchain/community/document_loaders/web/puppeteer"; - -/** - * Loader uses `page.evaluate(() => document.body.innerHTML)` - * as default evaluate function - **/ -const loader = new PuppeteerWebBaseLoader("https://www.tabnews.com.br/"); - -const docs = await loader.load(); -``` - -## Options - -Here's an explanation of the parameters you can pass to the PuppeteerWebBaseLoader constructor using the PuppeteerWebBaseLoaderOptions interface: - -```typescript -type PuppeteerWebBaseLoaderOptions = { - launchOptions?: PuppeteerLaunchOptions; - gotoOptions?: PuppeteerGotoOptions; - evaluate?: (page: Page, browser: Browser) => Promise; -}; -``` - -1. `launchOptions`: an optional object that specifies additional options to pass to the puppeteer.launch() method. This can include options such as the headless flag to launch the browser in headless mode, or the slowMo option to slow down Puppeteer's actions to make them easier to follow. - -2. `gotoOptions`: an optional object that specifies additional options to pass to the page.goto() method. This can include options such as the timeout option to specify the maximum navigation time in milliseconds, or the waitUntil option to specify when to consider the navigation as successful. - -3. `evaluate`: an optional function that can be used to evaluate JavaScript code on the page using the page.evaluate() method. This can be useful for extracting data from the page or interacting with page elements. The function should return a Promise that resolves to a string containing the result of the evaluation. - -By passing these options to the `PuppeteerWebBaseLoader` constructor, you can customize the behavior of the loader and use Puppeteer's powerful features to scrape and interact with web pages. - -Here is a basic example to do it: - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/document_loaders/puppeteer_web.ts"; -import ScreenshotExample from "@examples/document_loaders/puppeteer_screenshot_web.ts"; - -{Example} - -### Screenshots - -To take a screenshot of a site, initialize the loader the same as above, and call the `.screenshot()` method. -This will return an instance of `Document` where the page content is a base64 encoded image, and the metadata contains a `source` field with the URL of the page. - -{ScreenshotExample} diff --git a/docs/core_docs/docs/integrations/llms/azure.ipynb b/docs/core_docs/docs/integrations/llms/azure.ipynb new file mode 100644 index 000000000000..607eeb2b6263 --- /dev/null +++ b/docs/core_docs/docs/integrations/llms/azure.ipynb @@ -0,0 +1,346 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "67db2992", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Azure OpenAI\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "9597802c", + "metadata": {}, + "source": [ + "# AzureOpenAI\n", + "\n", + "```{=mdx}\n", + "\n", + ":::caution\n", + "You are currently on a page documenting the use of Azure OpenAI [text completion models](/docs/concepts/#llms). The latest and most popular Azure OpenAI models are [chat completion models](/docs/concepts/#chat-models).\n", + "\n", + "Unless you are specifically using `gpt-3.5-turbo-instruct`, you are probably looking for [this page instead](/docs/integrations/chat/azure/).\n", + ":::\n", + "\n", + ":::info\n", + "\n", + "Previously, LangChain.js supported integration with Azure OpenAI using the dedicated [Azure OpenAI SDK](https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/openai/openai). This SDK is now deprecated in favor of the new Azure integration in the OpenAI SDK, which allows to access the latest OpenAI models and features the same day they are released, and allows seemless transition between the OpenAI API and Azure OpenAI.\n", + "\n", + "If you are using Azure OpenAI with the deprecated SDK, see the [migration guide](#migration-from-azure-openai-sdk) to update to the new API.\n", + "\n", + ":::\n", + "\n", + "```\n", + "\n", + "This will help you get started with AzureOpenAI completion models (LLMs) using LangChain. For detailed documentation on `AzureOpenAI` features and configuration options, please refer to the [API reference](https://api.js.langchain.com/classes/langchain_openai.AzureOpenAI.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "- TODO: Fill in table features.\n", + "- TODO: Remove JS support link if not relevant, otherwise ensure link is correct.\n", + "- TODO: Make sure API reference links are correct.\n", + "\n", + "| Class | Package | Local | Serializable | [PY support](https://python.langchain.com/docs/integrations/llms/openai) | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n", + "| [AzureOpenAI](https://api.js.langchain.com/classes/langchain_openai.AzureOpenAI.html) | [@langchain/openai](https://api.js.langchain.com/modules/langchain_openai.html) | ❌ | ✅ | ✅ | ![NPM - Downloads](https://img.shields.io/npm/dm/@langchain/openai?style=flat-square&label=%20&) | ![NPM - Version](https://img.shields.io/npm/v/@langchain/openai?style=flat-square&label=%20&) |\n", + "\n", + "## Setup\n", + "\n", + "To access AzureOpenAI models you'll need to create an Azure account, get an API key, and install the `@langchain/openai` integration package.\n", + "\n", + "### Credentials\n", + "\n", + "Head to [azure.microsoft.com](https://azure.microsoft.com/) to sign up to AzureOpenAI and generate an API key. \n", + "\n", + "You'll also need to have an Azure OpenAI instance deployed. You can deploy a version on Azure Portal following [this guide](https://learn.microsoft.com/azure/ai-services/openai/how-to/create-resource?pivots=web-portal).\n", + "\n", + "Once you have your instance running, make sure you have the name of your instance and key. You can find the key in the Azure Portal, under the \"Keys and Endpoint\" section of your instance.\n", + "\n", + "If you're using Node.js, you can define the following environment variables to use the service:\n", + "\n", + "```bash\n", + "AZURE_OPENAI_API_INSTANCE_NAME=\n", + "AZURE_OPENAI_API_DEPLOYMENT_NAME=\n", + "AZURE_OPENAI_API_KEY=\n", + "AZURE_OPENAI_API_VERSION=\"2024-02-01\"\n", + "```\n", + "\n", + "Alternatively, you can pass the values directly to the `AzureOpenAI` constructor.\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```bash\n", + "# export LANGCHAIN_TRACING_V2=\"true\"\n", + "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain AzureOpenAI integration lives in the `@langchain/openai` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/openai\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0a760037", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and generate chat completions:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a0562a13", + "metadata": {}, + "outputs": [], + "source": [ + "import { AzureOpenAI } from \"@langchain/openai\"\n", + "\n", + "const llm = new AzureOpenAI({\n", + " model: \"gpt-3.5-turbo-instruct\",\n", + " azureOpenAIApiKey: \"\", // In Node.js defaults to process.env.AZURE_OPENAI_API_KEY\n", + " azureOpenAIApiInstanceName: \"\", // In Node.js defaults to process.env.AZURE_OPENAI_API_INSTANCE_NAME\n", + " azureOpenAIApiDeploymentName: \"\", // In Node.js defaults to process.env.AZURE_OPENAI_API_DEPLOYMENT_NAME\n", + " azureOpenAIApiVersion: \"\", // In Node.js defaults to process.env.AZURE_OPENAI_API_VERSION\n", + " temperature: 0,\n", + " maxTokens: undefined,\n", + " timeout: undefined,\n", + " maxRetries: 2,\n", + " // other params...\n", + "})" + ] + }, + { + "cell_type": "markdown", + "id": "0ee90032", + "metadata": {}, + "source": [ + "## Invocation" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "035dea0f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "provides AI solutions to businesses. They offer a range of services including natural language processing, computer vision, and machine learning. Their solutions are designed to help businesses automate processes, gain insights from data, and improve decision-making. AzureOpenAI also offers consulting services to help businesses identify and implement the best AI solutions for their specific needs. They work with a variety of industries, including healthcare, finance, and retail. With their expertise in AI and their partnership with Microsoft Azure, AzureOpenAI is a trusted provider of AI solutions for businesses looking to stay ahead in the rapidly evolving world of technology.\n" + ] + } + ], + "source": [ + "const inputText = \"AzureOpenAI is an AI company that \"\n", + "\n", + "const completion = await llm.invoke(inputText)\n", + "completion" + ] + }, + { + "cell_type": "markdown", + "id": "add38532", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "We can [chain](/docs/how_to/sequence/) our completion model with a prompt template like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "078e9db2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Ich liebe Programmieren.\n" + ] + } + ], + "source": [ + "import { PromptTemplate } from \"@langchain/core/prompts\"\n", + "\n", + "const prompt = new PromptTemplate({\n", + " template: \"How to say {input} in {output_language}:\\n\",\n", + " inputVariables: [\"input\", \"output_language\"],\n", + "})\n", + "\n", + "const chain = prompt.pipe(llm);\n", + "await chain.invoke(\n", + " {\n", + " output_language: \"German\",\n", + " input: \"I love programming.\",\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e99eef30", + "metadata": {}, + "source": [ + "## Using Azure Managed Identity\n", + "\n", + "If you're using Azure Managed Identity, you can configure the credentials like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c21d1eb8", + "metadata": {}, + "outputs": [], + "source": [ + "import {\n", + " DefaultAzureCredential,\n", + " getBearerTokenProvider,\n", + "} from \"@azure/identity\";\n", + "import { AzureOpenAI } from \"@langchain/openai\";\n", + "\n", + "const credentials = new DefaultAzureCredential();\n", + "const azureADTokenProvider = getBearerTokenProvider(\n", + " credentials,\n", + " \"https://cognitiveservices.azure.com/.default\"\n", + ");\n", + "\n", + "const managedIdentityLLM = new AzureOpenAI({\n", + " azureADTokenProvider,\n", + " azureOpenAIApiInstanceName: \"\",\n", + " azureOpenAIApiDeploymentName: \"\",\n", + " azureOpenAIApiVersion: \"\",\n", + "});\n" + ] + }, + { + "cell_type": "markdown", + "id": "94c2572b", + "metadata": {}, + "source": [ + "## Using a different domain\n", + "\n", + "If your instance is hosted under a domain other than the default `openai.azure.com`, you'll need to use the alternate `AZURE_OPENAI_BASE_PATH` environment variable.\n", + "For example, here's how you would connect to the domain `https://westeurope.api.microsoft.com/openai/deployments/{DEPLOYMENT_NAME}`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbf107a2", + "metadata": {}, + "outputs": [], + "source": [ + "import { AzureOpenAI } from \"@langchain/openai\";\n", + "\n", + "const differentDomainLLM = new AzureOpenAI({\n", + " azureOpenAIApiKey: \"\", // In Node.js defaults to process.env.AZURE_OPENAI_API_KEY\n", + " azureOpenAIApiDeploymentName: \"\", // In Node.js defaults to process.env.AZURE_OPENAI_API_DEPLOYMENT_NAME\n", + " azureOpenAIApiVersion: \"\", // In Node.js defaults to process.env.AZURE_OPENAI_API_VERSION\n", + " azureOpenAIBasePath:\n", + " \"https://westeurope.api.microsoft.com/openai/deployments\", // In Node.js defaults to process.env.AZURE_OPENAI_BASE_PATH\n", + "});\n" + ] + }, + { + "cell_type": "markdown", + "id": "afcff984", + "metadata": {}, + "source": [ + "## Migration from Azure OpenAI SDK\n", + "\n", + "If you are using the deprecated Azure OpenAI SDK with the `@langchain/azure-openai` package, you can update your code to use the new Azure integration following these steps:\n", + "\n", + "1. Install the new `@langchain/openai` package and remove the previous `@langchain/azure-openai` package:\n", + " ```bash\n", + " npm install @langchain/openai\n", + " npm uninstall @langchain/azure-openai\n", + " ```\n", + "2. Update your imports to use the new `AzureOpenAI` and `AzureChatOpenAI` classes from the `@langchain/openai` package:\n", + " ```typescript\n", + " import { AzureOpenAI } from \"@langchain/openai\";\n", + " ```\n", + "3. Update your code to use the new `AzureOpenAI` and `AzureChatOpenAI` classes and pass the required parameters:\n", + "\n", + " ```typescript\n", + " const model = new AzureOpenAI({\n", + " azureOpenAIApiKey: \"\",\n", + " azureOpenAIApiInstanceName: \"\",\n", + " azureOpenAIApiDeploymentName: \"\",\n", + " azureOpenAIApiVersion: \"\",\n", + " });\n", + " ```\n", + "\n", + " Notice that the constructor now requires the `azureOpenAIApiInstanceName` parameter instead of the `azureOpenAIEndpoint` parameter, and adds the `azureOpenAIApiVersion` parameter to specify the API version.\n", + "\n", + " - If you were using Azure Managed Identity, you now need to use the `azureADTokenProvider` parameter to the constructor instead of `credentials`, see the [Azure Managed Identity](#using-azure-managed-identity) section for more details.\n", + "\n", + " - If you were using environment variables, you now have to set the `AZURE_OPENAI_API_INSTANCE_NAME` environment variable instead of `AZURE_OPENAI_API_ENDPOINT`, and add the `AZURE_OPENAI_API_VERSION` environment variable to specify the API version.\n" + ] + }, + { + "cell_type": "markdown", + "id": "e9bdfcef", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all AzureOpenAI features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_openai.AzureOpenAI.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + }, + "vscode": { + "interpreter": { + "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/llms/azure.mdx b/docs/core_docs/docs/integrations/llms/azure.mdx deleted file mode 100644 index 36ed1b3b65ad..000000000000 --- a/docs/core_docs/docs/integrations/llms/azure.mdx +++ /dev/null @@ -1,123 +0,0 @@ -import CodeBlock from "@theme/CodeBlock"; - -# Azure OpenAI - -:::caution -You are currently on a page documenting the use of Azure OpenAI [text completion models](/docs/concepts/#llms). The latest and most popular Azure OpenAI models are [chat completion models](/docs/concepts/#chat-models). - -Unless you are specifically using `gpt-3.5-turbo-instruct`, you are probably looking for [this page instead](/docs/integrations/chat/azure/). -::: - -[Azure OpenAI](https://azure.microsoft.com/products/ai-services/openai-service/) is a cloud service to help you quickly develop generative AI experiences with a diverse set of prebuilt and curated models from OpenAI, Meta and beyond. - -LangChain.js supports integration with [Azure OpenAI](https://azure.microsoft.com/products/ai-services/openai-service/) using the new Azure integration in the [OpenAI SDK](https://github.com/openai/openai-node). - -You can learn more about Azure OpenAI and its difference with the OpenAI API on [this page](https://learn.microsoft.com/azure/ai-services/openai/overview). If you don't have an Azure account, you can [create a free account](https://azure.microsoft.com/free/) to get started. - -:::info - -Previously, LangChain.js supported integration with Azure OpenAI using the dedicated [Azure OpenAI SDK](https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/openai/openai). This SDK is now deprecated in favor of the new Azure integration in the OpenAI SDK, which allows to access the latest OpenAI models and features the same day they are released, and allows seemless transition between the OpenAI API and Azure OpenAI. - -If you are using Azure OpenAI with the deprecated SDK, see the [migration guide](#migration-from-azure-openai-sdk) to update to the new API. - -::: - -## Setup - -You'll first need to install the [`@langchain/openai`](https://www.npmjs.com/package/@langchain/openai) package: - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install -S @langchain/openai -``` - -You'll also need to have an Azure OpenAI instance deployed. You can deploy a version on Azure Portal following [this guide](https://learn.microsoft.com/azure/ai-services/openai/how-to/create-resource?pivots=web-portal). - -Once you have your instance running, make sure you have the name of your instance and key. You can find the key in the Azure Portal, under the "Keys and Endpoint" section of your instance. - -If you're using Node.js, you can define the following environment variables to use the service: - -```bash -AZURE_OPENAI_API_INSTANCE_NAME= -AZURE_OPENAI_API_DEPLOYMENT_NAME= -AZURE_OPENAI_API_KEY= -AZURE_OPENAI_API_VERSION="2024-02-01" -``` - -Alternatively, you can pass the values directly to the `AzureOpenAI` constructor: - -import AzureOpenAI from "@examples/models/llm/azure_openai.ts"; - -import UnifiedModelParamsTooltip from "@mdx_components/unified_model_params_tooltip.mdx"; - - - -{AzureOpenAI} - -:::info - -You can find the list of supported API versions in the [Azure OpenAI documentation](https://learn.microsoft.com/azure/ai-services/openai/reference). - -::: - -### Using Azure Managed Identity - -If you're using Azure Managed Identity, you can configure the credentials like this: - -import AzureOpenAIManagedIdentity from "@examples/models/llm/azure_openai-managed_identity.ts"; - -{AzureOpenAIManagedIdentity} - -### Using a different domain - -If your instance is hosted under a domain other than the default `openai.azure.com`, you'll need to use the alternate `AZURE_OPENAI_BASE_PATH` environment variable. -For example, here's how you would connect to the domain `https://westeurope.api.microsoft.com/openai/deployments/{DEPLOYMENT_NAME}`: - -import AzureOpenAIBasePath from "@examples/models/llm/azure_openai-base_path.ts"; - -{AzureOpenAIBasePath} - -### LLM usage example - -import LLMExample from "@examples/llms/azure_openai.ts"; - -{LLMExample} - -### Chat usage example - -import ChatExample from "@examples/llms/azure_openai-chat.ts"; - -{ChatExample} - -## Migration from Azure OpenAI SDK - -If you are using the deprecated Azure OpenAI SDK with the `@langchain/azure-openai` package, you can update your code to use the new Azure integration following these steps: - -1. Install the new `@langchain/openai` package and remove the previous `@langchain/azure-openai` package: - ```bash npm2yarn - npm install @langchain/openai - npm uninstall @langchain/azure-openai - ``` -2. Update your imports to use the new `AzureOpenAI` and `AzureChatOpenAI` classes from the `@langchain/openai` package: - ```typescript - import { AzureOpenAI } from "@langchain/openai"; - ``` -3. Update your code to use the new `AzureOpenAI` and `AzureChatOpenAI` classes and pass the required parameters: - - ```typescript - const model = new AzureOpenAI({ - azureOpenAIApiKey: "", - azureOpenAIApiInstanceName: "", - azureOpenAIApiDeploymentName: "", - azureOpenAIApiVersion: "", - }); - ``` - - Notice that the constructor now requires the `azureOpenAIApiInstanceName` parameter instead of the `azureOpenAIEndpoint` parameter, and adds the `azureOpenAIApiVersion` parameter to specify the API version. - - - If you were using Azure Managed Identity, you now need to use the `azureADTokenProvider` parameter to the constructor instead of `credentials`, see the [Azure Managed Identity](#using-azure-managed-identity) section for more details. - - - If you were using environment variables, you now have to set the `AZURE_OPENAI_API_INSTANCE_NAME` environment variable instead of `AZURE_OPENAI_API_ENDPOINT`, and add the `AZURE_OPENAI_API_VERSION` environment variable to specify the API version. diff --git a/docs/core_docs/docs/integrations/llms/bedrock.ipynb b/docs/core_docs/docs/integrations/llms/bedrock.ipynb new file mode 100644 index 000000000000..be1ff211a38e --- /dev/null +++ b/docs/core_docs/docs/integrations/llms/bedrock.ipynb @@ -0,0 +1,284 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "67db2992", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Bedrock\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "9597802c", + "metadata": {}, + "source": [ + "# Bedrock\n", + "\n", + "```{=mdx}\n", + "\n", + ":::caution\n", + "You are currently on a page documenting the use of Amazon Bedrock models as [text completion models](/docs/concepts/#llms). Many popular models available on Bedrock are [chat completion models](/docs/concepts/#chat-models).\n", + "\n", + "You may be looking for [this page instead](/docs/integrations/chat/bedrock/).\n", + ":::\n", + "\n", + "```\n", + "\n", + "> [Amazon Bedrock](https://aws.amazon.com/bedrock/) is a fully managed service that makes Foundation Models (FMs)\n", + "> from leading AI startups and Amazon available via an API. You can choose from a wide range of FMs to find the model that is best suited for your use case.\n", + "\n", + "This will help you get started with Bedrock completion models (LLMs) using LangChain. For detailed documentation on `Bedrock` features and configuration options, please refer to the [API reference](https://api.js.langchain.com/classes/langchain_community_llms_bedrock.Bedrock.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "- TODO: Fill in table features.\n", + "- TODO: Remove JS support link if not relevant, otherwise ensure link is correct.\n", + "- TODO: Make sure API reference links are correct.\n", + "\n", + "| Class | Package | Local | Serializable | [PY support](https://python.langchain.com/docs/integrations/llms/bedrock) | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n", + "| [Bedrock](https://api.js.langchain.com/classes/langchain_community_llms_bedrock.Bedrock.html) | [@langchain/community](https://api.js.langchain.com/modules/langchain_community_llms_bedrock.html) | ❌ | ✅ | ✅ | ![NPM - Downloads](https://img.shields.io/npm/dm/@langchain/community?style=flat-square&label=%20&) | ![NPM - Version](https://img.shields.io/npm/v/@langchain/community?style=flat-square&label=%20&) |\n", + "\n", + "## Setup\n", + "\n", + "To access Bedrock models you'll need to create an AWS account, get an API key, and install the `@langchain/community` integration, along with a few peer dependencies.\n", + "\n", + "### Credentials\n", + "\n", + "Head to [aws.amazon.com](https://aws.amazon.com) to sign up to AWS Bedrock and generate an API key. Once you've done this set the environment variables:\n", + "\n", + "```bash\n", + "export BEDROCK_AWS_REGION=\"your-region-url\"\n", + "export BEDROCK_AWS_ACCESS_KEY_ID=\"your-access-key-id\"\n", + "export BEDROCK_AWS_SECRET_ACCESS_KEY=\"your-secret-access-key\"\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```bash\n", + "# export LANGCHAIN_TRACING_V2=\"true\"\n", + "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain Bedrock integration lives in the `@langchain/community` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community\n", + "\n", + "\n", + "And install the peer dependencies:\n", + "\n", + "\n", + " @aws-crypto/sha256-js @aws-sdk/credential-provider-node @smithy/protocol-http @smithy/signature-v4 @smithy/eventstream-codec @smithy/util-utf8 @aws-sdk/types\n", + "\n", + "\n", + "You can also use Bedrock in web environments such as Edge functions or Cloudflare Workers by omitting the `@aws-sdk/credential-provider-node` dependency\n", + "and using the `web` entrypoint:\n", + "\n", + "\n", + " @aws-crypto/sha256-js @smithy/protocol-http @smithy/signature-v4 @smithy/eventstream-codec @smithy/util-utf8 @aws-sdk/types\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0a760037", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and generate chat completions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "093ae37f", + "metadata": {}, + "outputs": [], + "source": [ + "// @lc-docs-hide-cell\n", + "// Deno requires these imports, and way of loading env vars.\n", + "// we don't want to expose in the docs.\n", + "// Below this cell we have a typescript markdown codeblock with\n", + "// the node code.\n", + "import \"@aws-sdk/credential-provider-node\";\n", + "import \"@smithy/protocol-http\";\n", + "import \"@aws-crypto/sha256-js\";\n", + "import \"@smithy/protocol-http\";\n", + "import \"@smithy/signature-v4\";\n", + "import \"@smithy/eventstream-codec\";\n", + "import \"@smithy/util-utf8\";\n", + "import \"@aws-sdk/types\";\n", + "import { Bedrock } from \"@langchain/community/llms/bedrock\"\n", + "import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n", + "\n", + "const llm = new Bedrock({\n", + " model: \"anthropic.claude-v2\",\n", + " region: \"us-east-1\",\n", + " // endpointUrl: \"custom.amazonaws.com\",\n", + " credentials: {\n", + " accessKeyId: getEnvironmentVariable(\"BEDROCK_AWS_ACCESS_KEY_ID\"),\n", + " secretAccessKey: getEnvironmentVariable(\"BEDROCK_AWS_SECRET_ACCESS_KEY\"),\n", + " },\n", + " temperature: 0,\n", + " maxTokens: undefined,\n", + " maxRetries: 2,\n", + " // other params...\n", + "})" + ] + }, + { + "cell_type": "markdown", + "id": "a0562a13", + "metadata": {}, + "source": [ + "```typescript\n", + "import { Bedrock } from \"@langchain/community/llms/bedrock\"\n", + "\n", + "const llm = new Bedrock({\n", + " model: \"anthropic.claude-v2\",\n", + " region: process.env.BEDROCK_AWS_REGION ?? \"us-east-1\",\n", + " // endpointUrl: \"custom.amazonaws.com\",\n", + " credentials: {\n", + " accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID,\n", + " secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY,\n", + " },\n", + " temperature: 0,\n", + " maxTokens: undefined,\n", + " maxRetries: 2,\n", + " // other params...\n", + "})\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0ee90032", + "metadata": {}, + "source": [ + "## Invocation\n", + "\n", + "Note that some models require specific prompting techniques. For example, Anthropic's Claude-v2 model will throw an error if\n", + "the prompt does not start with `Human: `." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "035dea0f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32m\" Here are a few key points about Bedrock AI:\\n\"\u001b[39m +\n", + " \u001b[32m\"\\n\"\u001b[39m +\n", + " \u001b[32m\"- Bedrock was founded in 2021 and is based in San Fran\"\u001b[39m... 116 more characters" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "const inputText = \"Human: Bedrock is an AI company that\\nAssistant: \"\n", + "\n", + "const completion = await llm.invoke(inputText)\n", + "completion" + ] + }, + { + "cell_type": "markdown", + "id": "add38532", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "We can [chain](/docs/how_to/sequence/) our completion model with a prompt template like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "078e9db2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32m' Here is how to say \"I love programming\" in German:\\n'\u001b[39m +\n", + " \u001b[32m\"\\n\"\u001b[39m +\n", + " \u001b[32m\"Ich liebe das Programmieren.\"\u001b[39m" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import { PromptTemplate } from \"@langchain/core/prompts\"\n", + "\n", + "const prompt = PromptTemplate.fromTemplate(\"Human: How to say {input} in {output_language}:\\nAssistant:\")\n", + "\n", + "const chain = prompt.pipe(llm);\n", + "await chain.invoke(\n", + " {\n", + " output_language: \"German\",\n", + " input: \"I love programming.\",\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e9bdfcef", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all Bedrock features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_community_llms_bedrock.Bedrock.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Deno", + "language": "typescript", + "name": "deno" + }, + "language_info": { + "file_extension": ".ts", + "mimetype": "text/x.typescript", + "name": "typescript", + "nb_converter": "script", + "pygments_lexer": "typescript", + "version": "5.3.3" + }, + "vscode": { + "interpreter": { + "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/llms/bedrock.mdx b/docs/core_docs/docs/integrations/llms/bedrock.mdx deleted file mode 100644 index 12627060a129..000000000000 --- a/docs/core_docs/docs/integrations/llms/bedrock.mdx +++ /dev/null @@ -1,43 +0,0 @@ -# Bedrock - -:::caution -You are currently on a page documenting the use of Amazon Bedrock models as [text completion models](/docs/concepts/#llms). Many popular models available on Bedrock are [chat completion models](/docs/concepts/#chat-models). - -You may be looking for [this page instead](/docs/integrations/chat/bedrock/). -::: - -> [Amazon Bedrock](https://aws.amazon.com/bedrock/) is a fully managed service that makes Foundation Models (FMs) -> from leading AI startups and Amazon available via an API. You can choose from a wide range of FMs to find the model that is best suited for your use case. - -## Setup - -You'll need to install a few official AWS packages as peer dependencies: - -```bash npm2yarn -npm install @aws-crypto/sha256-js @aws-sdk/credential-provider-node @smithy/protocol-http @smithy/signature-v4 @smithy/eventstream-codec @smithy/util-utf8 @aws-sdk/types -``` - -You can also use Bedrock in web environments such as Edge functions or Cloudflare Workers by omitting the `@aws-sdk/credential-provider-node` dependency -and using the `web` entrypoint: - -```bash npm2yarn -npm install @aws-crypto/sha256-js @smithy/protocol-http @smithy/signature-v4 @smithy/eventstream-codec @smithy/util-utf8 @aws-sdk/types -``` - -## Usage - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/community -``` - -Note that some models require specific prompting techniques. For example, Anthropic's Claude-v2 model will throw an error if -the prompt does not start with `Human: `. - -import CodeBlock from "@theme/CodeBlock"; -import BedrockExample from "@examples/models/llm/bedrock.ts"; - -{BedrockExample} diff --git a/docs/core_docs/docs/integrations/llms/cohere.ipynb b/docs/core_docs/docs/integrations/llms/cohere.ipynb new file mode 100644 index 000000000000..054d857e7aca --- /dev/null +++ b/docs/core_docs/docs/integrations/llms/cohere.ipynb @@ -0,0 +1,225 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "67db2992", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Cohere\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "9597802c", + "metadata": {}, + "source": [ + "# Cohere\n", + "\n", + "```{=mdx}\n", + "\n", + ":::warning Legacy\n", + "\n", + "Cohere has marked their `generate` endpoint for LLMs as deprecated. Follow their [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using their Chat API via the [`ChatCohere`](/docs/integrations/chat/cohere) integration.\n", + "\n", + ":::\n", + "\n", + ":::caution\n", + "You are currently on a page documenting the use of Cohere models as [text completion models](/docs/concepts/#llms). Many popular models available on Cohere are [chat completion models](/docs/concepts/#chat-models).\n", + "\n", + "You may be looking for [this page instead](/docs/integrations/chat/cohere/).\n", + ":::\n", + "\n", + "```\n", + "\n", + "This will help you get started with Cohere completion models (LLMs) using LangChain. For detailed documentation on `Cohere` features and configuration options, please refer to the [API reference](https://api.js.langchain.com/classes/langchain_cohere.Cohere.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | Serializable | [PY support](https://python.langchain.com/docs/integrations/llms/cohere) | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n", + "| [Cohere](https://api.js.langchain.com/classes/langchain_cohere.Cohere.html) | [@langchain/cohere](https://api.js.langchain.com/modules/langchain_cohere.html) | ❌ | ✅ | ✅ | ![NPM - Downloads](https://img.shields.io/npm/dm/@langchain/cohere?style=flat-square&label=%20&) | ![NPM - Version](https://img.shields.io/npm/v/@langchain/cohere?style=flat-square&label=%20&) |\n", + "\n", + "## Setup\n", + "\n", + "To access Cohere models you'll need to create a Cohere account, get an API key, and install the `@langchain/cohere` integration package.\n", + "\n", + "### Credentials\n", + "\n", + "Head to [cohere.com](https://cohere.com) to sign up to Cohere and generate an API key. Once you've done this set the `COHERE_API_KEY` environment variable:\n", + "\n", + "```bash\n", + "export COHERE_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```bash\n", + "# export LANGCHAIN_TRACING_V2=\"true\"\n", + "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain Cohere integration lives in the `@langchain/cohere` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/cohere\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0a760037", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and generate chat completions:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a0562a13", + "metadata": {}, + "outputs": [], + "source": [ + "import { Cohere } from \"@langchain/cohere\"\n", + "\n", + "const llm = new Cohere({\n", + " model: \"command\",\n", + " temperature: 0,\n", + " maxTokens: undefined,\n", + " maxRetries: 2,\n", + " // other params...\n", + "})" + ] + }, + { + "cell_type": "markdown", + "id": "0ee90032", + "metadata": {}, + "source": [ + "## Invocation" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "035dea0f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cohere is a company that provides natural language processing models that help companies improve human-machine interactions. Cohere was founded in 2019 by Aidan Gomez, Ivan Zhang, and Nick Frosst. \n" + ] + } + ], + "source": [ + "const inputText = \"Cohere is an AI company that \"\n", + "\n", + "const completion = await llm.invoke(inputText)\n", + "completion" + ] + }, + { + "cell_type": "markdown", + "id": "add38532", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "We can [chain](/docs/how_to/sequence/) our completion model with a prompt template like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "078e9db2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Ich liebe Programming.\n", + "\n", + "But for day to day purposes Ich mag Programming. would be enough and perfectly understood.\n", + "\n", + "I love programming is \"Ich liebe Programming\" and I like programming is \"Ich mag Programming\" respectively.\n", + "\n", + "There are also other ways to express this feeling, such as \"Ich habe Spaß mit Programming\", which means \"I enjoy programming\". But \"Ich mag\" and \"Ich liebe\" are the most common expressions for this.\n", + "\n", + "Let me know if I can be of further help with something else! \n" + ] + } + ], + "source": [ + "import { PromptTemplate } from \"@langchain/core/prompts\"\n", + "\n", + "const prompt = new PromptTemplate({\n", + " template: \"How to say {input} in {output_language}:\\n\",\n", + " inputVariables: [\"input\", \"output_language\"],\n", + "})\n", + "\n", + "const chain = prompt.pipe(llm);\n", + "await chain.invoke(\n", + " {\n", + " output_language: \"German\",\n", + " input: \"I love programming.\",\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e9bdfcef", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all Cohere features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_cohere.Cohere.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + }, + "vscode": { + "interpreter": { + "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/llms/cohere.mdx b/docs/core_docs/docs/integrations/llms/cohere.mdx deleted file mode 100644 index 3085c31a2332..000000000000 --- a/docs/core_docs/docs/integrations/llms/cohere.mdx +++ /dev/null @@ -1,25 +0,0 @@ -# Cohere - -:::caution -You are currently on a page documenting the use of Cohere models as [text completion models](/docs/concepts/#llms). Many popular models available on Cohere are [chat completion models](/docs/concepts/#chat-models). - -You may be looking for [this page instead](/docs/integrations/chat/cohere/). -::: - -import CodeBlock from "@theme/CodeBlock"; - -LangChain.js supports Cohere LLMs. Here's an example: - -You'll first need to install the [`@langchain/cohere`](https://www.npmjs.com/package/@langchain/cohere) package. - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/cohere -``` - -import BasicExample from "@examples/models/llm/cohere.ts"; - -{BasicExample} diff --git a/docs/core_docs/docs/integrations/llms/fireworks.ipynb b/docs/core_docs/docs/integrations/llms/fireworks.ipynb new file mode 100644 index 000000000000..ead90271e243 --- /dev/null +++ b/docs/core_docs/docs/integrations/llms/fireworks.ipynb @@ -0,0 +1,279 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "67db2992", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Fireworks\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "9597802c", + "metadata": {}, + "source": [ + "# Fireworks\n", + "\n", + "\n", + "```{=mdx}\n", + "\n", + ":::caution\n", + "You are currently on a page documenting the use of Fireworks models as [text completion models](/docs/concepts/#llms). Many popular models available on Fireworks are [chat completion models](/docs/concepts/#chat-models).\n", + "\n", + "You may be looking for [this page instead](/docs/integrations/chat/fireworks/).\n", + ":::\n", + "\n", + "```\n", + "\n", + "This will help you get started with Fireworks completion models (LLMs) using LangChain. For detailed documentation on `Fireworks` features and configuration options, please refer to the [API reference](https://api.js.langchain.com/classes/langchain_community_llms_fireworks.Fireworks.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | Serializable | [PY support](https://python.langchain.com/docs/integrations/llms/fireworks) | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n", + "| [Fireworks](https://api.js.langchain.com/classes/langchain_community_llms_fireworks.Fireworks.html) | [@langchain/community](https://api.js.langchain.com/modules/langchain_community_llms_fireworks.html) | ❌ | ✅ | ✅ | ![NPM - Downloads](https://img.shields.io/npm/dm/@langchain/community?style=flat-square&label=%20&) | ![NPM - Version](https://img.shields.io/npm/v/@langchain/community?style=flat-square&label=%20&) |\n", + "\n", + "## Setup\n", + "\n", + "To access Fireworks models you'll need to create a Fireworks account, get an API key, and install the `@langchain/community` integration package.\n", + "\n", + "### Credentials\n", + "\n", + "Head to [fireworks.ai](https://fireworks.ai/) to sign up to Fireworks and generate an API key. Once you've done this set the `FIREWORKS_API_KEY` environment variable:\n", + "\n", + "```bash\n", + "export FIREWORKS_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```bash\n", + "# export LANGCHAIN_TRACING_V2=\"true\"\n", + "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain Fireworks integration lives in the `@langchain/community` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0a760037", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and generate chat completions:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a0562a13", + "metadata": {}, + "outputs": [], + "source": [ + "import { Fireworks } from \"@langchain/community/llms/fireworks\"\n", + "\n", + "const llm = new Fireworks({\n", + " model: \"accounts/fireworks/models/llama-v3-70b-instruct\",\n", + " temperature: 0,\n", + " maxTokens: undefined,\n", + " timeout: undefined,\n", + " maxRetries: 2,\n", + " // other params...\n", + "})" + ] + }, + { + "cell_type": "markdown", + "id": "0ee90032", + "metadata": {}, + "source": [ + "## Invocation" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "035dea0f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " helps businesses automate their workflows and processes using AI and machine learning. Our platform provides a suite of tools that enable companies to automate repetitive tasks, extract insights from data, and make predictions about future outcomes.\n", + "\n", + "We're looking for a talented and motivated **Machine Learning Engineer** to join our team. As a Machine Learning Engineer at Fireworks, you will be responsible for designing, developing, and deploying machine learning models that drive business value for our customers. You will work closely with our data science team to develop and improve our AI models, and collaborate with our engineering team to integrate these models into our platform.\n", + "\n", + "**Responsibilities:**\n", + "\n", + "* Design, develop, and deploy machine learning models that drive business value for our customers\n", + "* Collaborate with data scientists to develop and improve AI models\n", + "* Work with the engineering team to integrate machine learning models into our platform\n", + "* Develop and maintain scalable and efficient machine learning pipelines\n", + "* Stay up-to-date with the latest developments in machine learning and AI\n", + "* Communicate complex technical concepts to non-technical stakeholders\n", + "\n", + "**Requirements:**\n", + "\n", + "* Bachelor's or Master's degree in Computer Science, Machine Learning, or a related field\n", + "* 3+ years of experience in machine learning engineering\n", + "* Strong programming skills in Python and experience with machine learning frameworks\n" + ] + } + ], + "source": [ + "const inputText = \"Fireworks is an AI company that \"\n", + "\n", + "const completion = await llm.invoke(inputText)\n", + "completion" + ] + }, + { + "cell_type": "markdown", + "id": "add38532", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "We can [chain](/docs/how_to/sequence/) our completion model with a prompt template like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "078e9db2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ich liebe Programmieren.\n", + "\n", + "How to say I love coding. in German:\n", + "Ich liebe Coden.\n", + "\n", + "How to say I love to code. in German:\n", + "Ich liebe es zu coden.\n", + "\n", + "How to say I'm a programmer. in German:\n", + "Ich bin Programmierer.\n", + "\n", + "How to say I'm a coder. in German:\n", + "Ich bin Coder.\n", + "\n", + "How to say I'm a developer. in German:\n", + "Ich bin Entwickler.\n", + "\n", + "How to say I'm a software engineer. in German:\n", + "Ich bin Software-Ingenieur.\n", + "\n", + "How to say I'm a tech enthusiast. in German:\n", + "Ich bin Technik-Enthusiast.\n", + "\n", + "How to say I'm passionate about technology. in German:\n", + "Ich bin leidenschaftlich für Technologie.\n", + "\n", + "How to say I'm passionate about coding. in German:\n", + "Ich bin leidenschaftlich für Coden.\n", + "\n", + "How to say I'm passionate about programming. in German:\n", + "Ich bin leidenschaftlich für Programmieren.\n", + "\n", + "How to say I enjoy coding. in German:\n", + "Ich genieße Coden.\n", + "\n", + "How to say I enjoy programming. in German:\n", + "Ich genieße Programmieren.\n", + "\n", + "How to say I'm good at coding. in German:\n", + "Ich bin gut im Coden.\n", + "\n", + "How to say I'm good at programming. in\n" + ] + } + ], + "source": [ + "import { PromptTemplate } from \"@langchain/core/prompts\"\n", + "\n", + "const prompt = PromptTemplate.fromTemplate(\"How to say {input} in {output_language}:\\n\")\n", + "\n", + "const chain = prompt.pipe(llm);\n", + "await chain.invoke(\n", + " {\n", + " output_language: \"German\",\n", + " input: \"I love programming.\",\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "4989353f", + "metadata": {}, + "source": [ + "Behind the scenes, Fireworks AI uses the OpenAI SDK and OpenAI compatible API, with some caveats:\n", + "\n", + "- Certain properties are not supported by the Fireworks API, see [here](https://readme.fireworks.ai/docs/openai-compatibility#api-compatibility).\n", + "- Generation using multiple prompts is not supported.\n" + ] + }, + { + "cell_type": "markdown", + "id": "e9bdfcef", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all Fireworks features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_community_llms_fireworks.Fireworks.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + }, + "vscode": { + "interpreter": { + "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/llms/fireworks.mdx b/docs/core_docs/docs/integrations/llms/fireworks.mdx deleted file mode 100644 index 8548ef1e64f7..000000000000 --- a/docs/core_docs/docs/integrations/llms/fireworks.mdx +++ /dev/null @@ -1,32 +0,0 @@ ---- -sidebar_label: Fireworks ---- - -import CodeBlock from "@theme/CodeBlock"; - -# Fireworks - -:::caution -You are currently on a page documenting the use of Fireworks models as [text completion models](/docs/concepts/#llms). Many popular models available on Fireworks are [chat completion models](/docs/concepts/#chat-models). - -You may be looking for [this page instead](/docs/integrations/chat/fireworks/). -::: - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/community -``` - -You can use models provided by Fireworks AI as follows: - -import Fireworks from "@examples/models/llm/fireworks.ts"; - -{Fireworks} - -Behind the scenes, Fireworks AI uses the OpenAI SDK and OpenAI compatible API, with some caveats: - -- Certain properties are not supported by the Fireworks API, see [here](https://readme.fireworks.ai/docs/openai-compatibility#api-compatibility). -- Generation using multiple prompts is not supported. diff --git a/docs/core_docs/docs/integrations/llms/google_vertex_ai.ipynb b/docs/core_docs/docs/integrations/llms/google_vertex_ai.ipynb new file mode 100644 index 000000000000..314ba75c540e --- /dev/null +++ b/docs/core_docs/docs/integrations/llms/google_vertex_ai.ipynb @@ -0,0 +1,279 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "67db2992", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Google Vertex AI\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "9597802c", + "metadata": {}, + "source": [ + "# Google Vertex AI\n", + "\n", + "```{=mdx}\n", + "\n", + ":::caution\n", + "You are currently on a page documenting the use of Google Vertex models as [text completion models](/docs/concepts/#llms). Many popular models available on Google Vertex are [chat completion models](/docs/concepts/#chat-models).\n", + "\n", + "You may be looking for [this page instead](/docs/integrations/chat/google_vertex_ai/).\n", + ":::\n", + "\n", + "```\n", + "\n", + "LangChain.js supports two different authentication methods based on whether\n", + "you're running in a Node.js environment or a web environment.\n", + "\n", + "This will help you get started with VertexAI completion models (LLMs) using LangChain. For detailed documentation on `VertexAI` features and configuration options, please refer to the [API reference](https://api.js.langchain.com/classes/langchain_google_vertexai.VertexAI.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | Serializable | [PY support](https://python.langchain.com/docs/integrations/llms/google_vertex_ai_palm) | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n", + "| [VertexAI](https://api.js.langchain.com/classes/langchain_google_vertexai.VertexAI.html) | [@langchain/google-vertexai](https://api.js.langchain.com/modules/langchain_google_vertexai.html) | ❌ | ✅ | ✅ | ![NPM - Downloads](https://img.shields.io/npm/dm/@langchain/google-vertexai?style=flat-square&label=%20&) | ![NPM - Version](https://img.shields.io/npm/v/@langchain/google-vertexai?style=flat-square&label=%20&) |\n", + "\n", + "## Setup\n", + "\n", + "To access VertexAI models you'll need to create a Google Cloud Platform (GCP) account, get an API key, and install the `@langchain/google-vertexai` integration package.\n", + "\n", + "### Credentials\n", + "\n", + "#### Node.js\n", + "\n", + "You should make sure the Vertex AI API is\n", + "enabled for the relevant project and that you've authenticated to\n", + "Google Cloud using one of these methods:\n", + "\n", + "- You are logged into an account (using `gcloud auth application-default login`)\n", + " permitted to that project.\n", + "- You are running on a machine using a service account that is permitted\n", + " to the project.\n", + "- You have downloaded the credentials for a service account that is permitted\n", + " to the project and set the `GOOGLE_APPLICATION_CREDENTIALS` environment\n", + " variable to the path of this file.\n", + " **or**\n", + "- You set the `GOOGLE_API_KEY` environment variable to the API key for the project.\n", + "\n", + "#### Web\n", + "\n", + "To call Vertex AI models in web environments (like Edge functions), you'll need to install\n", + "the `@langchain/google-vertexai-web` package.\n", + "\n", + "Then, you'll need to add your service account credentials directly as a `GOOGLE_VERTEX_AI_WEB_CREDENTIALS` environment variable:\n", + "\n", + "```\n", + "GOOGLE_VERTEX_AI_WEB_CREDENTIALS={\"type\":\"service_account\",\"project_id\":\"YOUR_PROJECT-12345\",...}\n", + "```\n", + "\n", + "You can also pass your credentials directly in code like this:\n", + "\n", + "```typescript\n", + "import { VertexAI } from \"@langchain/google-vertexai\";\n", + "// Or uncomment this line if you're using the web version:\n", + "// import { VertexAI } from \"@langchain/google-vertexai-web\";\n", + "\n", + "const model = new VertexAI({\n", + " authOptions: {\n", + " credentials: {\"type\":\"service_account\",\"project_id\":\"YOUR_PROJECT-12345\",...},\n", + " },\n", + "});\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```bash\n", + "# export LANGCHAIN_TRACING_V2=\"true\"\n", + "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain VertexAI integration lives in the `@langchain/google-vertexai` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/google-vertexai\n", + "\n", + "\n", + "or for web environments:\n", + "\n", + "\n", + " @langchain/google-vertexai-web\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0a760037", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and generate chat completions:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a0562a13", + "metadata": {}, + "outputs": [], + "source": [ + "import { VertexAI } from \"@langchain/google-vertexai-web\"\n", + "\n", + "const llm = new VertexAI({\n", + " model: \"gemini-pro\",\n", + " temperature: 0,\n", + " maxRetries: 2,\n", + " // other params...\n", + "})" + ] + }, + { + "cell_type": "markdown", + "id": "0ee90032", + "metadata": {}, + "source": [ + "## Invocation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "035dea0f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "const inputText = \"VertexAI is an AI company that \"\n", + "\n", + "const completion = await llm.invoke(inputText)\n", + "completion" + ] + }, + { + "cell_type": "markdown", + "id": "f580765e", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "```txt\n", + "offers a wide range of cloud computing services and artificial intelligence solutions to businesses and developers worldwide.\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "add38532", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "We can [chain](/docs/how_to/sequence/) our completion model with a prompt template like so:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "078e9db2", + "metadata": {}, + "outputs": [], + "source": [ + "import { PromptTemplate } from \"@langchain/core/prompts\"\n", + "\n", + "const prompt = PromptTemplate.fromTemplate(\"How to say {input} in {output_language}:\\n\")\n", + "\n", + "const chain = prompt.pipe(llm);\n", + "await chain.invoke(\n", + " {\n", + " output_language: \"German\",\n", + " input: \"I love programming.\",\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "4d106b41", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "```txt\n", + "\"Ich liebe Programmieren.\"\n", + "Pronunciation guide:\n", + "\n", + "Ich: [ɪç] (similar to \"ikh\" with a soft \"ch\" sound)\n", + "liebe: [ˈliːbə] (LEE-buh)\n", + "Programmieren: [pʁoɡʁaˈmiːʁən] (pro-gra-MEE-ren)\n", + "\n", + "You could also say:\n", + "\"Ich liebe es zu programmieren.\"\n", + "Which translates more literally to \"I love to program.\" This version is a bit more formal or precise.\n", + "Pronunciation:\n", + "\n", + "es: [ɛs] (like the letter \"S\")\n", + "zu: [tsuː] (tsoo)\n", + "\n", + "Both versions are correct and commonly used.\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "e9bdfcef", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all VertexAI features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_google_vertexai.VertexAI.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Deno", + "language": "typescript", + "name": "deno" + }, + "language_info": { + "file_extension": ".ts", + "mimetype": "text/x.typescript", + "name": "typescript", + "nb_converter": "script", + "pygments_lexer": "typescript", + "version": "5.3.3" + }, + "vscode": { + "interpreter": { + "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/llms/google_vertex_ai.mdx b/docs/core_docs/docs/integrations/llms/google_vertex_ai.mdx deleted file mode 100644 index 51b34d36c9df..000000000000 --- a/docs/core_docs/docs/integrations/llms/google_vertex_ai.mdx +++ /dev/null @@ -1,84 +0,0 @@ -# Google Vertex AI - -:::caution -You are currently on a page documenting the use of Google Vertex models as [text completion models](/docs/concepts/#llms). Many popular models available on Google Vertex are [chat completion models](/docs/concepts/#chat-models). - -You may be looking for [this page instead](/docs/integrations/chat/google_vertex_ai/). -::: - -LangChain.js supports two different authentication methods based on whether -you're running in a Node.js environment or a web environment. - -## Setup - -### Node.js - -To call Vertex AI models in Node, you'll need to install the `@langchain/google-vertexai` package: - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/google-vertexai -``` - -You should make sure the Vertex AI API is -enabled for the relevant project and that you've authenticated to -Google Cloud using one of these methods: - -- You are logged into an account (using `gcloud auth application-default login`) - permitted to that project. -- You are running on a machine using a service account that is permitted - to the project. -- You have downloaded the credentials for a service account that is permitted - to the project and set the `GOOGLE_APPLICATION_CREDENTIALS` environment - variable to the path of this file. - **or** -- You set the `GOOGLE_API_KEY` environment variable to the API key for the project. - -### Web - -To call Vertex AI models in web environments (like Edge functions), you'll need to install -the `@langchain/google-vertexai-web` package: - -```bash npm2yarn -npm install @langchain/google-vertexai-web -``` - -Then, you'll need to add your service account credentials directly as a `GOOGLE_VERTEX_AI_WEB_CREDENTIALS` environment variable: - -``` -GOOGLE_VERTEX_AI_WEB_CREDENTIALS={"type":"service_account","project_id":"YOUR_PROJECT-12345",...} -``` - -You can also pass your credentials directly in code like this: - -```typescript -import { VertexAI } from "@langchain/google-vertexai"; -// Or uncomment this line if you're using the web version: -// import { VertexAI } from "@langchain/google-vertexai-web"; - -const model = new VertexAI({ - authOptions: { - credentials: {"type":"service_account","project_id":"YOUR_PROJECT-12345",...}, - }, -}); -``` - -## Usage - -The entire family of `gemini` models are available by specifying the `modelName` parameter. - -import CodeBlock from "@theme/CodeBlock"; -import VertexAILLMExample from "@examples/llms/googlevertexai.ts"; - -{VertexAILLMExample} - -### Streaming - -Streaming in multiple chunks is supported for faster responses: - -import VertexAILLMStreaming from "@examples/llms/googlevertexai-streaming.ts"; - -{VertexAILLMStreaming} diff --git a/docs/core_docs/docs/integrations/llms/mistral.ipynb b/docs/core_docs/docs/integrations/llms/mistral.ipynb index 6cd3bfec377a..1cadb282444e 100644 --- a/docs/core_docs/docs/integrations/llms/mistral.ipynb +++ b/docs/core_docs/docs/integrations/llms/mistral.ipynb @@ -1,18 +1,67 @@ { "cells": [ + { + "cell_type": "raw", + "id": "67db2992", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: MistralAI\n", + "---" + ] + }, { "cell_type": "markdown", + "id": "9597802c", "metadata": {}, "source": [ "# MistralAI\n", "\n", "```{=mdx}\n", + "\n", ":::tip\n", "Want to run Mistral's models locally? Check out our [Ollama integration](/docs/integrations/chat/ollama).\n", ":::\n", + "\n", + ":::caution\n", + "You are currently on a page documenting the use of Cohere models as [text completion models](/docs/concepts/#llms). Many popular models available on Mistral are [chat completion models](/docs/concepts/#chat-models).\n", + "\n", + "You may be looking for [this page instead](/docs/integrations/chat/mistral/).\n", + ":::\n", + "\n", "```\n", "\n", - "Here's how you can initialize an `MistralAI` LLM instance:\n", + "This will help you get started with MistralAI completion models (LLMs) using LangChain. For detailed documentation on `MistralAI` features and configuration options, please refer to the [API reference](https://api.js.langchain.com/classes/langchain_mistralai.MistralAI.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | Serializable | PY support | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n", + "| [MistralAI](https://api.js.langchain.com/classes/langchain_mistralai.MistralAI.html) | [@langchain/mistralai](https://api.js.langchain.com/modules/langchain_mistralai.html) | ❌ | ✅ | ❌ | ![NPM - Downloads](https://img.shields.io/npm/dm/@langchain/mistralai?style=flat-square&label=%20&) | ![NPM - Version](https://img.shields.io/npm/v/@langchain/mistralai?style=flat-square&label=%20&) |\n", + "\n", + "## Setup\n", + "\n", + "To access MistralAI models you'll need to create a MistralAI account, get an API key, and install the `@langchain/mistralai` integration package.\n", + "\n", + "### Credentials\n", + "\n", + "Head to [console.mistral.ai](https://console.mistral.ai/) to sign up to MistralAI and generate an API key. Once you've done this set the `MISTRAL_API_KEY` environment variable:\n", + "\n", + "```bash\n", + "export MISTRAL_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```bash\n", + "# export LANGCHAIN_TRACING_V2=\"true\"\n", + "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain MistralAI integration lives in the `@langchain/mistralai` package:\n", "\n", "```{=mdx}\n", "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", @@ -23,12 +72,91 @@ "\n", " @langchain/mistralai\n", "\n", - "```\n" + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0a760037", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and generate chat completions:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a0562a13", + "metadata": {}, + "outputs": [], + "source": [ + "import { MistralAI } from \"@langchain/mistralai\"\n", + "\n", + "const llm = new MistralAI({\n", + " model: \"codestral-latest\",\n", + " temperature: 0,\n", + " maxTokens: undefined,\n", + " maxRetries: 2,\n", + " // other params...\n", + "})" + ] + }, + { + "cell_type": "markdown", + "id": "0ee90032", + "metadata": {}, + "source": [ + "## Invocation" ] }, { "cell_type": "code", "execution_count": 3, + "id": "035dea0f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " has developed Mistral 7B, a large language model (LLM) that is open-source and available for commercial use. Mistral 7B is a 7 billion parameter model that is trained on a diverse and high-quality dataset, and it has been fine-tuned to perform well on a variety of tasks, including text generation, question answering, and code interpretation.\n", + "\n", + "MistralAI has made Mistral 7B available under a permissive license, allowing anyone to use the model for commercial purposes without having to pay any fees. This has made Mistral 7B a popular choice for businesses and organizations that want to leverage the power of large language models without incurring high costs.\n", + "\n", + "Mistral 7B has been trained on a diverse and high-quality dataset, which has enabled it to perform well on a variety of tasks. It has been fine-tuned to generate coherent and contextually relevant text, and it has been shown to be capable of answering complex questions and interpreting code.\n", + "\n", + "Mistral 7B is also a highly efficient model, capable of processing text at a fast pace. This makes it well-suited for applications that require real-time responses, such as chatbots and virtual assistants.\n", + "\n", + "Overall, Mistral 7B is a powerful and versatile large language model that is open-source and available for commercial use. Its ability to perform well on a variety of tasks, its efficiency, and its permissive license make it a popular choice for businesses and organizations that want to leverage the power of large language models.\n" + ] + } + ], + "source": [ + "const inputText = \"MistralAI is an AI company that \"\n", + "\n", + "const completion = await llm.invoke(inputText)\n", + "completion" + ] + }, + { + "cell_type": "markdown", + "id": "add38532", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "We can [chain](/docs/how_to/sequence/) our completion model with a prompt template like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "078e9db2", "metadata": {}, "outputs": [ { @@ -36,28 +164,31 @@ "output_type": "stream", "text": [ "\n", - "console.log('hello world');\n", - "```\n", - "This will output 'hello world' to the console.\n" + "I love programming.\n", + "\n", + "Ich liebe Programmieren.\n", + "\n", + "In German, the phrase \"I love programming\" is translated as \"Ich liebe Programmieren.\" The word \"programming\" is translated to \"Programmieren,\" and \"I love\" is translated to \"Ich liebe.\"\n" ] } ], "source": [ - "import { MistralAI } from \"@langchain/mistralai\";\n", + "import { PromptTemplate } from \"@langchain/core/prompts\"\n", "\n", - "const model = new MistralAI({\n", - " model: \"codestral-latest\", // Defaults to \"codestral-latest\" if no model provided.\n", - " temperature: 0,\n", - " apiKey: \"YOUR-API-KEY\", // In Node.js defaults to process.env.MISTRAL_API_KEY\n", - "});\n", - "const res = await model.invoke(\n", - " \"You can print 'hello world' to the console in javascript like this:\\n```javascript\"\n", - ");\n", - "console.log(res);" + "const prompt = PromptTemplate.fromTemplate(\"How to say {input} in {output_language}:\\n\")\n", + "\n", + "const chain = prompt.pipe(llm);\n", + "await chain.invoke(\n", + " {\n", + " output_language: \"German\",\n", + " input: \"I love programming.\",\n", + " }\n", + ")" ] }, { "cell_type": "markdown", + "id": "e99eef30", "metadata": {}, "source": [ "Since the Mistral LLM is a completions model, they also allow you to insert a `suffix` to the prompt. Suffixes can be passed via the call options when invoking a model like so:" @@ -65,7 +196,8 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, + "id": "ec67551d", "metadata": {}, "outputs": [ { @@ -79,16 +211,17 @@ } ], "source": [ - "const res = await model.invoke(\n", + "const suffixResponse = await llm.invoke(\n", " \"You can print 'hello world' to the console in javascript like this:\\n```javascript\", {\n", " suffix: \"```\"\n", " }\n", ");\n", - "console.log(res);" + "console.log(suffixResponse);" ] }, { "cell_type": "markdown", + "id": "b9265343", "metadata": {}, "source": [ "As seen in the first example, the model generated the requested `console.log('hello world')` code snippet, but also included extra unwanted text. By adding a suffix, we can constrain the model to only complete the prompt up to the suffix (in this case, three backticks). This allows us to easily parse the completion and extract only the desired response without the suffix using a custom output parser." @@ -97,6 +230,7 @@ { "cell_type": "code", "execution_count": 1, + "id": "e2d34dc8", "metadata": {}, "outputs": [ { @@ -112,10 +246,9 @@ "source": [ "import { MistralAI } from \"@langchain/mistralai\";\n", "\n", - "const model = new MistralAI({\n", + "const llmForFillInCompletion = new MistralAI({\n", " model: \"codestral-latest\",\n", " temperature: 0,\n", - " apiKey: \"YOUR-API-KEY\",\n", "});\n", "\n", "const suffix = \"```\";\n", @@ -127,13 +260,23 @@ " throw new Error(\"Input does not contain suffix.\")\n", "};\n", "\n", - "const res = await model.invoke(\n", + "const resWithParser = await llmForFillInCompletion.invoke(\n", " \"You can print 'hello world' to the console in javascript like this:\\n```javascript\", {\n", " suffix,\n", " }\n", ");\n", "\n", - "console.log(customOutputParser(res));" + "console.log(customOutputParser(resWithParser));" + ] + }, + { + "cell_type": "markdown", + "id": "e9bdfcef", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all MistralAI features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_mistralai.MistralAI.html" ] } ], @@ -153,8 +296,13 @@ "mimetype": "text/typescript", "name": "typescript", "version": "3.7.2" + }, + "vscode": { + "interpreter": { + "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1" + } } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 5 } diff --git a/docs/core_docs/docs/integrations/llms/mistralai.ipynb b/docs/core_docs/docs/integrations/llms/mistralai.ipynb new file mode 100644 index 000000000000..fb946dab71aa --- /dev/null +++ b/docs/core_docs/docs/integrations/llms/mistralai.ipynb @@ -0,0 +1,324 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "67db2992", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: MistralAI\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "9597802c", + "metadata": {}, + "source": [ + "# MistralAI\n", + "\n", + "```{=mdx}\n", + "\n", + ":::tip\n", + "Want to run Mistral's models locally? Check out our [Ollama integration](/docs/integrations/chat/ollama).\n", + ":::\n", + "\n", + ":::caution\n", + "You are currently on a page documenting the use of Cohere models as [text completion models](/docs/concepts/#llms). Many popular models available on Mistral are [chat completion models](/docs/concepts/#chat-models).\n", + "\n", + "You may be looking for [this page instead](/docs/integrations/chat/mistral/).\n", + ":::\n", + "\n", + "```\n", + "\n", + "This will help you get started with MistralAI completion models (LLMs) using LangChain. For detailed documentation on `MistralAI` features and configuration options, please refer to the [API reference](https://api.js.langchain.com/classes/langchain_mistralai.MistralAI.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | Serializable | PY support | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n", + "| [MistralAI](https://api.js.langchain.com/classes/langchain_mistralai.MistralAI.html) | [@langchain/mistralai](https://api.js.langchain.com/modules/langchain_mistralai.html) | ❌ | ✅ | ❌ | ![NPM - Downloads](https://img.shields.io/npm/dm/@langchain/mistralai?style=flat-square&label=%20&) | ![NPM - Version](https://img.shields.io/npm/v/@langchain/mistralai?style=flat-square&label=%20&) |\n", + "\n", + "## Setup\n", + "\n", + "To access MistralAI models you'll need to create a MistralAI account, get an API key, and install the `@langchain/mistralai` integration package.\n", + "\n", + "### Credentials\n", + "\n", + "Head to [console.mistral.ai](https://console.mistral.ai/) to sign up to MistralAI and generate an API key. Once you've done this set the `MISTRAL_API_KEY` environment variable:\n", + "\n", + "```bash\n", + "export MISTRAL_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```bash\n", + "# export LANGCHAIN_TRACING_V2=\"true\"\n", + "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain MistralAI integration lives in the `@langchain/mistralai` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/mistralai\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0a760037", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and generate chat completions:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a0562a13", + "metadata": {}, + "outputs": [], + "source": [ + "import { MistralAI } from \"@langchain/mistralai\"\n", + "\n", + "const llm = new MistralAI({\n", + " model: \"codestral-latest\",\n", + " temperature: 0,\n", + " maxTokens: undefined,\n", + " maxRetries: 2,\n", + " // other params...\n", + "})" + ] + }, + { + "cell_type": "markdown", + "id": "0ee90032", + "metadata": {}, + "source": [ + "## Invocation" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "035dea0f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " has developed Mistral 7B, a large language model (LLM) that is open-source and available for commercial use. Mistral 7B is a 7 billion parameter model that is trained on a diverse and high-quality dataset, and it has been fine-tuned to perform well on a variety of tasks, including text generation, question answering, and code interpretation.\n", + "\n", + "MistralAI has made Mistral 7B available under a permissive license, allowing anyone to use the model for commercial purposes without having to pay any fees. This has made Mistral 7B a popular choice for businesses and organizations that want to leverage the power of large language models without incurring high costs.\n", + "\n", + "Mistral 7B has been trained on a diverse and high-quality dataset, which has enabled it to perform well on a variety of tasks. It has been fine-tuned to generate coherent and contextually relevant text, and it has been shown to be capable of answering complex questions and interpreting code.\n", + "\n", + "Mistral 7B is also a highly efficient model, capable of processing text at a fast pace. This makes it well-suited for applications that require real-time responses, such as chatbots and virtual assistants.\n", + "\n", + "Overall, Mistral 7B is a powerful and versatile large language model that is open-source and available for commercial use. Its ability to perform well on a variety of tasks, its efficiency, and its permissive license make it a popular choice for businesses and organizations that want to leverage the power of large language models.\n" + ] + } + ], + "source": [ + "const inputText = \"MistralAI is an AI company that \"\n", + "\n", + "const completion = await llm.invoke(inputText)\n", + "completion" + ] + }, + { + "cell_type": "markdown", + "id": "add38532", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "We can [chain](/docs/how_to/sequence/) our completion model with a prompt template like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "078e9db2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "I love programming.\n", + "\n", + "Ich liebe Programmieren.\n", + "\n", + "In German, the phrase \"I love programming\" is translated as \"Ich liebe Programmieren.\" The word \"programming\" is translated to \"Programmieren,\" and \"I love\" is translated to \"Ich liebe.\"\n" + ] + } + ], + "source": [ + "import { PromptTemplate } from \"@langchain/core/prompts\"\n", + "\n", + "const prompt = PromptTemplate.fromTemplate(\"How to say {input} in {output_language}:\\n\")\n", + "\n", + "const chain = prompt.pipe(llm);\n", + "await chain.invoke(\n", + " {\n", + " output_language: \"German\",\n", + " input: \"I love programming.\",\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e99eef30", + "metadata": {}, + "source": [ + "Since the Mistral LLM is a completions model, they also allow you to insert a `suffix` to the prompt. Suffixes can be passed via the call options when invoking a model like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ec67551d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "console.log('hello world');\n", + "```\n" + ] + } + ], + "source": [ + "const suffixResponse = await llm.invoke(\n", + " \"You can print 'hello world' to the console in javascript like this:\\n```javascript\", {\n", + " suffix: \"```\"\n", + " }\n", + ");\n", + "console.log(suffixResponse);" + ] + }, + { + "cell_type": "markdown", + "id": "b9265343", + "metadata": {}, + "source": [ + "As seen in the first example, the model generated the requested `console.log('hello world')` code snippet, but also included extra unwanted text. By adding a suffix, we can constrain the model to only complete the prompt up to the suffix (in this case, three backticks). This allows us to easily parse the completion and extract only the desired response without the suffix using a custom output parser." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e2d34dc8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "console.log('hello world');\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Request failed: HTTP error! status: 401 Response: \n", + "{\n", + " \"message\":\"Unauthorized\",\n", + " \"request_id\":\"ce8f413d3cbde9728a021e3aa2dd0301\"\n", + "}\n", + "Request failed: HTTP error! status: 401 Response: \n", + "{\n", + " \"message\":\"Unauthorized\",\n", + " \"request_id\":\"1f49393b12190949487b204e91001176\"\n", + "}\n" + ] + } + ], + "source": [ + "import { MistralAI } from \"@langchain/mistralai\";\n", + "\n", + "const llmForFillInCompletion = new MistralAI({\n", + " model: \"codestral-latest\",\n", + " temperature: 0,\n", + "});\n", + "\n", + "const suffix = \"```\";\n", + "\n", + "const customOutputParser = (input: string) => {\n", + " if (input.includes(suffix)) {\n", + " return input.split(suffix)[0];\n", + " }\n", + " throw new Error(\"Input does not contain suffix.\")\n", + "};\n", + "\n", + "const resWithParser = await llmForFillInCompletion.invoke(\n", + " \"You can print 'hello world' to the console in javascript like this:\\n```javascript\", {\n", + " suffix,\n", + " }\n", + ");\n", + "\n", + "console.log(customOutputParser(resWithParser));" + ] + }, + { + "cell_type": "markdown", + "id": "e9bdfcef", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all MistralAI features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_mistralai.MistralAI.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + }, + "vscode": { + "interpreter": { + "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/llms/openai.ipynb b/docs/core_docs/docs/integrations/llms/openai.ipynb new file mode 100644 index 000000000000..bd08a1a333e7 --- /dev/null +++ b/docs/core_docs/docs/integrations/llms/openai.ipynb @@ -0,0 +1,260 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "67db2992", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: OpenAI\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "9597802c", + "metadata": {}, + "source": [ + "# OpenAI\n", + "\n", + "```{=mdx}\n", + "\n", + ":::caution\n", + "You are currently on a page documenting the use of OpenAI [text completion models](/docs/concepts/#llms). The latest and most popular OpenAI models are [chat completion models](/docs/concepts/#chat-models).\n", + "\n", + "Unless you are specifically using `gpt-3.5-turbo-instruct`, you are probably looking for [this page instead](/docs/integrations/chat/openai/).\n", + ":::\n", + "\n", + "```\n", + "\n", + "This will help you get started with OpenAI completion models (LLMs) using LangChain. For detailed documentation on `OpenAI` features and configuration options, please refer to the [API reference](https://api.js.langchain.com/classes/langchain_openai.OpenAI.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | Serializable | [PY support](https://python.langchain.com/docs/integrations/llms/openai) | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n", + "| [OpenAI](https://api.js.langchain.com/classes/langchain_openai.OpenAI.html) | [@langchain/openai](https://api.js.langchain.com/modules/langchain_openai.html) | ❌ | ✅ | ✅ | ![NPM - Downloads](https://img.shields.io/npm/dm/@langchain/openai?style=flat-square&label=%20&) | ![NPM - Version](https://img.shields.io/npm/v/@langchain/openai?style=flat-square&label=%20&) |\n", + "\n", + "## Setup\n", + "\n", + "To access OpenAI models you'll need to create an OpenAI account, get an API key, and install the `@langchain/openai` integration package.\n", + "\n", + "### Credentials\n", + "\n", + "Head to [platform.openai.com](https://platform.openai.com/) to sign up to OpenAI and generate an API key. Once you've done this set the `OPENAI_API_KEY` environment variable:\n", + "\n", + "```bash\n", + "export OPENAI_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```bash\n", + "# export LANGCHAIN_TRACING_V2=\"true\"\n", + "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain OpenAI integration lives in the `@langchain/openai` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/openai\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0a760037", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and generate chat completions:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a0562a13", + "metadata": {}, + "outputs": [], + "source": [ + "import { OpenAI } from \"@langchain/openai\"\n", + "\n", + "const llm = new OpenAI({\n", + " model: \"gpt-3.5-turbo-instruct\",\n", + " temperature: 0,\n", + " maxTokens: undefined,\n", + " timeout: undefined,\n", + " maxRetries: 2,\n", + " apiKey: process.env.OPENAI_API_KEY,\n", + " // other params...\n", + "})" + ] + }, + { + "cell_type": "markdown", + "id": "0ee90032", + "metadata": {}, + "source": [ + "## Invocation" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "035dea0f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "develops and promotes friendly AI for the benefit of humanity. It was founded in 2015 by Elon Musk, Sam Altman, Greg Brockman, Ilya Sutskever, Wojciech Zaremba, John Schulman, and Chris Olah. The company's mission is to create and promote artificial general intelligence (AGI) that is safe and beneficial to humanity.\n", + "\n", + "OpenAI conducts research in various areas of AI, including deep learning, reinforcement learning, robotics, and natural language processing. The company also develops and releases open-source tools and platforms for AI research, such as the GPT-3 language model and the Gym toolkit for reinforcement learning.\n", + "\n", + "One of the main goals of OpenAI is to ensure that the development of AI is aligned with human values and does not pose a threat to humanity. To this end, the company has established a set of principles for safe and ethical AI development, and it actively collaborates with other organizations and researchers in the field.\n", + "\n", + "OpenAI has received funding from various sources, including tech giants like Microsoft and Amazon, as well as individual investors. It has also partnered with companies and organizations such as Google, IBM, and the United Nations to advance its research and promote responsible AI development.\n", + "\n", + "In addition to its research and development\n" + ] + } + ], + "source": [ + "const inputText = \"OpenAI is an AI company that \"\n", + "\n", + "const completion = await llm.invoke(inputText)\n", + "completion" + ] + }, + { + "cell_type": "markdown", + "id": "add38532", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "We can [chain](/docs/how_to/sequence/) our completion model with a prompt template like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "078e9db2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Ich liebe Programmieren.\n" + ] + } + ], + "source": [ + "import { PromptTemplate } from \"@langchain/core/prompts\"\n", + "\n", + "const prompt = new PromptTemplate({\n", + " template: \"How to say {input} in {output_language}:\\n\",\n", + " inputVariables: [\"input\", \"output_language\"],\n", + "})\n", + "\n", + "const chain = prompt.pipe(llm);\n", + "await chain.invoke(\n", + " {\n", + " output_language: \"German\",\n", + " input: \"I love programming.\",\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e99eef30", + "metadata": {}, + "source": [ + "If you're part of an organization, you can set `process.env.OPENAI_ORGANIZATION` to your OpenAI organization id, or pass it in as `organization` when\n", + "initializing the model.\n", + "\n", + "## Custom URLs\n", + "\n", + "You can customize the base URL the SDK sends requests to by passing a `configuration` parameter like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d958ab00", + "metadata": {}, + "outputs": [], + "source": [ + "const llmCustomURL = new OpenAI({\n", + " temperature: 0.9,\n", + " configuration: {\n", + " baseURL: \"https://your_custom_url.com\",\n", + " },\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "81a5e2ea", + "metadata": {}, + "source": [ + "You can also pass other `ClientOptions` parameters accepted by the official SDK.\n", + "\n", + "If you are hosting on Azure OpenAI, see the [dedicated page instead](/docs/integrations/llms/azure).\n" + ] + }, + { + "cell_type": "markdown", + "id": "e9bdfcef", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all OpenAI features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_openai.OpenAI.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + }, + "vscode": { + "interpreter": { + "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/llms/openai.mdx b/docs/core_docs/docs/integrations/llms/openai.mdx deleted file mode 100644 index e8ba5cd1e8a9..000000000000 --- a/docs/core_docs/docs/integrations/llms/openai.mdx +++ /dev/null @@ -1,55 +0,0 @@ -# OpenAI - -:::caution -You are currently on a page documenting the use of OpenAI [text completion models](/docs/concepts/#llms). The latest and most popular OpenAI models are [chat completion models](/docs/concepts/#chat-models). - -Unless you are specifically using `gpt-3.5-turbo-instruct`, you are probably looking for [this page instead](/docs/integrations/chat/openai/). -::: - -Here's how you can initialize an `OpenAI` LLM instance: - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/openai -``` - -import UnifiedModelParamsTooltip from "@mdx_components/unified_model_params_tooltip.mdx"; - - - -```typescript -import { OpenAI } from "@langchain/openai"; - -const model = new OpenAI({ - model: "gpt-3.5-turbo-instruct", // Defaults to "gpt-3.5-turbo-instruct" if no model provided. - temperature: 0.9, - apiKey: "YOUR-API-KEY", // In Node.js defaults to process.env.OPENAI_API_KEY -}); -const res = await model.invoke( - "What would be a good company name a company that makes colorful socks?" -); -console.log({ res }); -``` - -If you're part of an organization, you can set `process.env.OPENAI_ORGANIZATION` to your OpenAI organization id, or pass it in as `organization` when -initializing the model. - -## Custom URLs - -You can customize the base URL the SDK sends requests to by passing a `configuration` parameter like this: - -```typescript -const model = new OpenAI({ - temperature: 0.9, - configuration: { - baseURL: "https://your_custom_url.com", - }, -}); -``` - -You can also pass other `ClientOptions` parameters accepted by the official SDK. - -If you are hosting on Azure OpenAI, see the [dedicated page instead](/docs/integrations/llms/azure). diff --git a/docs/core_docs/docs/integrations/retrievers/bedrock-knowledge-bases.ipynb b/docs/core_docs/docs/integrations/retrievers/bedrock-knowledge-bases.ipynb new file mode 100644 index 000000000000..fbf57c6eb66a --- /dev/null +++ b/docs/core_docs/docs/integrations/retrievers/bedrock-knowledge-bases.ipynb @@ -0,0 +1,273 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Knowledge Bases for Amazon Bedrock\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# Knowledge Bases for Amazon Bedrock\n", + "\n", + "## Overview\n", + "\n", + "This will help you getting started with the [AmazonKnowledgeBaseRetriever](/docs/concepts/#retrievers). For detailed documentation of all AmazonKnowledgeBaseRetriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_aws.AmazonKnowledgeBaseRetriever.html).\n", + "\n", + "Knowledge Bases for Amazon Bedrock is a fully managed support for end-to-end RAG workflow provided by Amazon Web Services (AWS).\n", + "It provides an entire ingestion workflow of converting your documents into embeddings (vector) and storing the embeddings in a specialized vector database.\n", + "Knowledge Bases for Amazon Bedrock supports popular databases for vector storage, including vector engine for Amazon OpenSearch Serverless, Pinecone, Redis Enterprise Cloud, Amazon Aurora (coming soon), and MongoDB (coming soon).\n", + "\n", + "### Integration details\n", + "\n", + "| Retriever | Self-host | Cloud offering | Package | [Py support](https://python.langchain.com/docs/integrations/retrievers/bedrock/) |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "[AmazonKnowledgeBaseRetriever](https://api.js.langchain.com/classes/langchain_aws.AmazonKnowledgeBaseRetriever.html) | 🟠 (see details below) | ✅ | @langchain/aws | ✅ |\n", + "\n", + "> AWS Knowledge Base Retriever can be 'self hosted' in the sense you can run it on your own AWS infrastructure. However it is not possible to run on another cloud provider or on-premises.\n", + "\n", + "## Setup\n", + "\n", + "In order to use the AmazonKnowledgeBaseRetriever, you need to have an AWS account, where you can manage your indexes and documents. Once you've setup your account, set the following environment variables:\n", + "\n", + "```bash\n", + "process.env.AWS_KNOWLEDGE_BASE_ID=your-knowledge-base-id\n", + "process.env.AWS_ACCESS_KEY_ID=your-access-key-id\n", + "process.env.AWS_SECRET_ACCESS_KEY=your-secret-access-key\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "72ee0c4b-9764-423a-9dbf-95129e185210", + "metadata": {}, + "source": [ + "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a15d341e-3e26-4ca3-830b-5aab30ed66de", + "metadata": {}, + "outputs": [], + "source": [ + "// process.env.LANGSMITH_API_KEY = \"\";\n", + "// process.env.LANGSMITH_TRACING = \"true\";" + ] + }, + { + "cell_type": "markdown", + "id": "0730d6a1-c893-4840-9817-5e5251676d5d", + "metadata": {}, + "source": [ + "### Installation\n", + "\n", + "This retriever lives in the `@langchain/aws` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/aws\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a38cde65-254d-4219-a441-068766c0d4b5", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our retriever:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70cc8e65-2a02-408a-bbc6-8ef649057d82", + "metadata": {}, + "outputs": [], + "source": [ + "import { AmazonKnowledgeBaseRetriever } from \"@langchain/aws\";\n", + "\n", + "const retriever = new AmazonKnowledgeBaseRetriever({\n", + " topK: 10,\n", + " knowledgeBaseId: process.env.AWS_KNOWLEDGE_BASE_ID,\n", + " region: \"us-east-2\",\n", + " clientOptions: {\n", + " credentials: {\n", + " accessKeyId: process.env.AWS_ACCESS_KEY_ID,\n", + " secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,\n", + " },\n", + " },\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "5c5f2839-4020-424e-9fc9-07777eede442", + "metadata": {}, + "source": [ + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a", + "metadata": {}, + "outputs": [], + "source": [ + "const query = \"...\"\n", + "\n", + "await retriever.invoke(query);" + ] + }, + { + "cell_type": "markdown", + "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e", + "metadata": {}, + "source": [ + "## Use within a chain\n", + "\n", + "Like other retrievers, AmazonKnowledgeBaseRetriever can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", + "\n", + "We will need a LLM or chat model:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25b647a3-f8f2-4541-a289-7a241e43f9df", + "metadata": {}, + "outputs": [], + "source": [ + "// @ls-docs-hide-cell\n", + "\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({\n", + " model: \"gpt-4o-mini\",\n", + " temperature: 0,\n", + "});" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae", + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", + "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", + "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", + "\n", + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromTemplate(`\n", + "Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}`);\n", + "\n", + "const formatDocs = (docs: Document[]) => {\n", + " return docs.map((doc) => doc.pageContent).join(\"\\n\\n\");\n", + "}\n", + "\n", + "// See https://js.langchain.com/v0.2/docs/tutorials/rag\n", + "const ragChain = RunnableSequence.from([\n", + " {\n", + " context: retriever.pipe(formatDocs),\n", + " question: new RunnablePassthrough(),\n", + " },\n", + " prompt,\n", + " llm,\n", + " new StringOutputParser(),\n", + "]);" + ] + }, + { + "cell_type": "markdown", + "id": "22b1d6f8", + "metadata": {}, + "source": [ + "```{=mdx}\n", + "\n", + ":::tip\n", + "\n", + "See [our RAG tutorial](docs/tutorials/rag) for more information and examples on `RunnableSequence`'s like the one above.\n", + "\n", + ":::\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8", + "metadata": {}, + "outputs": [], + "source": [ + "await ragChain.invoke(\"...\")" + ] + }, + { + "cell_type": "markdown", + "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all AmazonKnowledgeBaseRetriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_aws.AmazonKnowledgeBaseRetriever.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "typescript", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 + } + \ No newline at end of file diff --git a/docs/core_docs/docs/integrations/retrievers/bedrock-knowledge-bases.mdx b/docs/core_docs/docs/integrations/retrievers/bedrock-knowledge-bases.mdx deleted file mode 100644 index 01cb48c6e1af..000000000000 --- a/docs/core_docs/docs/integrations/retrievers/bedrock-knowledge-bases.mdx +++ /dev/null @@ -1,26 +0,0 @@ ---- -hide_table_of_contents: true ---- - -# Knowledge Bases for Amazon Bedrock - -Knowledge Bases for Amazon Bedrock is a fully managed support for end-to-end RAG workflow provided by Amazon Web Services (AWS). -It provides an entire ingestion workflow of converting your documents into embeddings (vector) and storing the embeddings in a specialized vector database. -Knowledge Bases for Amazon Bedrock supports popular databases for vector storage, including vector engine for Amazon OpenSearch Serverless, Pinecone, Redis Enterprise Cloud, Amazon Aurora (coming soon), and MongoDB (coming soon). - -## Setup - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm i @langchain/aws -``` - -## Usage - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/retrievers/amazon_knowledge_bases.ts"; - -{Example} diff --git a/docs/core_docs/docs/integrations/retrievers/exa.ipynb b/docs/core_docs/docs/integrations/retrievers/exa.ipynb new file mode 100644 index 000000000000..ea1d9a8ddc37 --- /dev/null +++ b/docs/core_docs/docs/integrations/retrievers/exa.ipynb @@ -0,0 +1,353 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Exa\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# ExaRetriever\n", + "\n", + "## Overview\n", + "\n", + "This will help you getting started with the [ExaRetriever](/docs/concepts/#retrievers). For detailed documentation of all ExaRetriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_exa.ExaRetriever.html).\n", + "\n", + "### Integration details\n", + "\n", + "| Retriever | Source | Package |\n", + "| :--- | :--- | :---: |\n", + "[ExaRetriever](https://api.js.langchain.com/classes/langchain_exa.ExaRetriever.html) | Information on the web. | @langchain/exa |\n", + "\n", + "## Setup\n", + "\n", + "You'll need to set your API key as an environment variable.\n", + "\n", + "The `Exa` class defaults to `EXASEARCH_API_KEY` when searching for your API key.\n", + "\n", + "```typescript\n", + "process.env.EXASEARCH_API_KEY=\"\";\n", + "```\n", + "\n", + "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGSMITH_API_KEY = \"\";\n", + "// process.env.LANGSMITH_TRACING = \"true\";\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "This retriever lives in the `@langchain/exa` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/exa\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a38cde65-254d-4219-a441-068766c0d4b5", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our retriever:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "70cc8e65-2a02-408a-bbc6-8ef649057d82", + "metadata": {}, + "outputs": [], + "source": [ + "import { ExaRetriever } from \"@langchain/exa\";\n", + "import Exa from \"exa-js\";\n", + "\n", + "const retriever = new ExaRetriever({\n", + " // @lc-ts-ignore\n", + " client: new Exa(\n", + " process.env.EXASEARCH_API_KEY // default API key\n", + " ),\n", + " searchArgs: {\n", + " numResults: 2,\n", + " }\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "5c5f2839-4020-424e-9fc9-07777eede442", + "metadata": {}, + "source": [ + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: 'President Biden’s State of the Union Address\\n' +\n", + " 'Madam Speaker, Madam Vice President, and our First Lady and Second Gentleman, members of Congress and the Cabinet, Justices of the Supreme Court, my fellow Americans: Last year, COVID-19 kept us apart. This year, we’re finally together again.\\n' +\n", + " 'Tonight — tonight we meet as Democrats, Republicans, and independents, but, most importantly, as Americans with a duty to one another, to America, to the American people, and to the Constitution, and an unwavering resolve that freedom will always triumph over tyranny.\\n' +\n", + " 'Six — thank you. Six days ago, Russia’s Vladimir Putin sought to shake the very foundations of the free world, thinking he could make it bend to his menacing ways. But he badly miscalculated. He thought he could roll into Ukraine and the world would roll over. Instead, he met with a wall of strength he never anticipated or imagined. He met the Ukrainian people.\\n' +\n", + " 'From President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination literally inspires the world. Groups of citizens blocking tanks with their bodies. Everyone from students to retirees, to teachers turned soldiers defending their homeland.\\n' +\n", + " 'And in this struggle — President Zelenskyy said in his speech to the European Parliament, “Light will win over darkness.”\\n' +\n", + " 'The Ukrainian Ambassador to the United States is here tonight sitting with the First Lady. Let each of us, if you’re able to stand, stand and send an unmistakable signal to the world and Ukraine. Thank you. Thank you, thank you, thank you.\\n' +\n", + " 'She’s bright, she’s strong, and she’s resolved.\\n' +\n", + " 'Yes. We, the United States of America, stand with the Ukrainian people.\\n' +\n", + " 'Throughout our history, we’ve learned this lesson: When dictators do not pay a price for their aggression, they cause more chaos; they keep moving; and the costs, the threats to the America — and America, to the world keeps rising.\\n' +\n", + " 'That’s why the NATO Alliance was created: to secure peace and stability in Europe after World War Two.\\n' +\n", + " 'The United States is a member, along with 29 other nations. It matters. American diplomacy matters. American resolve matters.\\n' +\n", + " 'Putin’s latest attack on Ukraine was premeditated and totally unprovoked. He rejected repeated efforts at diplomacy.\\n' +\n", + " 'He thought the West and NATO wouldn’t respond. He thought he could divide us at home, in this chamber, in this nation. He thought he could divide us in Europe as well.\\n' +\n", + " 'But Putin was wrong. We are ready. We are united. And that’s what we did: We stayed united.\\n' +\n", + " 'We prepared extensively and carefully. We spent months building coalitions of other freedom-loving nations in Europe and the Americas to — from America to the Asian and African continents to confront Putin.\\n' +\n", + " 'Like many of you, I spent countless hours unifying our European Allies.\\n' +\n", + " 'We shared with the world, in advance, what we knew Putin was planning and precisely how he would try to falsely and justify his aggression.\\n' +\n", + " 'We countered Russia’s lies with the truth. And now — now that he’s acted, the free world is holding him accountable, along with 27 members of the European Union — including France, Germany, Italy — as well as countries like the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and many others. Even Switzerland are inflicting pain on Russia and supporting the people of Ukraine.\\n' +\n", + " 'Putin is now isolated from the world more than he has ever been.\\n' +\n", + " 'Together. Together. Together, along with our Allies, we are right now enforcing powerful economic sanctions. We’re cutting off Russia’s largest banks from the international financial system; preventing Russia’s Central Bank from defending the Russian ruble, making Putin’s $630 billion war fund worthless. We’re choking Russia’s access, we’re choking Russia’s access to technology that will sap its economic strength and weaken its military for years to come.\\n' +\n", + " 'Tonight, I say to the Russian oligarchs and the corrupt leaders who’ve bilked billions of dollars off this violent regime: No more.\\n' +\n", + " 'The United States — I mean it. The United States Department of Justice is assembling a dedicated task force to go after the crimes of the Russian oligarchs.\\n' +\n", + " 'We’re joining with European Allies to find and seize their yachts, their luxury apartments, their private jets. We’re coming for your ill-begotten gains.\\n' +\n", + " 'And, tonight, I’m announcing that we will join our Allies in closing off American air space to all Russian flights, further isolating Russia and adding an additional squeeze on their economy.\\n' +\n", + " 'He has no idea what’s coming.\\n' +\n", + " 'The ruble has already lost 30 percent of its value, the Russian stock market has lost 40 percent of its value, and trading remains suspended.\\n' +\n", + " 'The Russian economy is reeling, and Putin alone is the one to blame.\\n' +\n", + " 'Together with our Allies, we’re providing support to the Ukrainians in their fight for freedom: military assistance, economic assistance, humanitarian assistance. We’re giving more than a billion dollars in direct assistance to Ukraine. And we’ll continue to aid the Ukrainian people as they defend their country and help ease their suffering.\\n' +\n", + " 'But let me be clear: Our forces are not engaged and will not engage in the conflict with Russian forces in Ukraine. Our forces are not going to Europe to fight in Ukraine but to defend our NATO Allies in the event that Putin decides to keep moving west.\\n' +\n", + " 'For that purpose, we have mobilized American ground forces, air squadrons, ship deployments to protect NATO countries, including Poland, Romania, Latvia, Lithuania, and Estonia.\\n' +\n", + " 'And as I’ve made crystal clear, the United States and our Allies will defend every inch of territory that is NATO territory with the full force of our collective power — every single inch.\\n' +\n", + " 'And we’re clear-eyed. The Ukrainians are fighting back with pure courage. But the next few days, weeks, and months will be hard on them.\\n' +\n", + " 'Putin has unleashed violence and chaos. But while he may make gains on the battlefield, he will pay a continuing high price over the long run.\\n' +\n", + " 'And a pound of Ukrainian people — the proud, proud people — pound for pound, ready to fight with every inch of (inaudible) they have. They’ve known 30 years of independence — have repeatedly shown that they will not tolerate anyone who tries to take their country backwards.\\n' +\n", + " 'To all Americans, I’ll be honest with you, as I’ve always promised I would be. A Russian dictator infa- — invading a foreign country has costs around the world. And I’m taking robust action to make sure the pain of our sanctions is targeted at the Russian economy and that we use every tool at our disposal to protect American businesses and consumers.\\n' +\n", + " 'Tonight, I can announce the United States has worked with 30 other countries to release 60 million barrels of oil from reserves around the world. America will lead that effort, releasing 30 million barrels of our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, united with our Allies.\\n' +\n", + " 'These steps will help blunt gas prices here at home. But I know news about what’s happening can seem alarming to all Americans. But I want you to know: We’re going to be okay. We’re going to be okay.\\n' +\n", + " 'When the history of this era is written, Putin’s war on Ukraine will have left Russia weaker and the rest of the world stronger.\\n' +\n", + " 'While it shouldn’t and while it shouldn’t have taken something so terrible for people around the world to see what’s at stake, now everyone sees it clearly.\\n' +\n", + " 'We see the unity among leaders of nations, a more unified Europe, a more unified West.\\n' +\n", + " 'We see unity among the people who are gathering in cities in large crowds around the world, even in Russia, to demonstrate their support for the people of Ukraine.\\n' +\n", + " 'In the battle between democracy and autocracies, democracies are rising to the moment and the world is clearly choosing the side of peace and security.\\n' +\n", + " 'This is the real test, and it’s going to take time. So, let us continue to draw inspiration from the iron will of the Ukrainian people.\\n' +\n", + " 'To our fellow Ukrainian Americans who forged a deep bond that connects our two nations: We stand with you. We stand with you.\\n' +\n", + " 'Putin may circle Kyiv with tanks, but he’ll never gain the hearts and souls of Ukrainian people. He’ll never — he’ll never extinguish their love of freedom. And he will never, never weaken the resolve of the free world.\\n' +\n", + " 'We meet tonight in an America that has lived through two of the hardest years this nation has ever faced. The pandemic has been punishing. And so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more.\\n' +\n", + " 'I understand, like many of you did. My dad had to leave his home in Scranton, Pennsylvania, to find work. So, like many of you, I grew up in a family when the price of food went up, it was felt throughout the family; it had an impact.\\n' +\n", + " 'That’s why one of the first things I did as President was fight to pass the American Rescue Plan, because people were hurting. We needed to act and we did.\\n' +\n", + " 'American Rescue Plan \\n' +\n", + " 'Few pieces of legislation have done more at a critical moment in our history to lift us out of a crisis. It fueled our efforts to vaccinate the nation and combat COVID-19. It delivered immediate economic relief to tens of millions of Americans. It helped put food on the table. Remember those long lines of cars waiting for hours just to get a box of food put in their trunk? It cut the cost of healthcare insurance. And as my dad used to say, it gave the people “just a little bit of breathing room.”\\n' +\n", + " 'And unlike the $2 trillion tax cut passed in the previous administration that benefitted the top 1 percent of Americans, the American Rescue Plan helped working people and left no one behind. And, folks — and it worked. It worked.\\n' +\n", + " 'It worked and created jobs — lots of jobs. In fact, our economy created over 6.5 million new jobs just last year, more jobs in one year than ever before in the history of the United States of America.\\n' +\n", + " 'Economic Progress Report \\n' +\n", + " 'The economy grew at a rate of 5.7 last year — the strongest growth'... 35166 more characters,\n", + " metadata: {\n", + " score: 0.16303963959217072,\n", + " title: '2022 State of the Union Address | The White House',\n", + " id: 'https://www.whitehouse.gov/state-of-the-union-2022/',\n", + " url: 'https://www.whitehouse.gov/state-of-the-union-2022/',\n", + " publishedDate: '2022-02-25',\n", + " author: ''\n", + " },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: \"The President. Thank you all very, very much. Thank you, please. Thank you so much. Madam Speaker, Madam Vice President, and our First Lady and Second Gentleman, Members of Congress and the Cabinet, Justices of the Supreme Court, my fellow Americans: Last year, COVID-19 kept us apart. This year, we're finally together again.\\n\" +\n", + " 'Tonight we meet as Democrats, Republicans, and Independents, but most importantly, as Americans with a duty to one another, to America, to the American people, and to the Constitution, and an unwavering resolve that freedom will always triumph over tyranny.\\n' +\n", + " \"Six—[applause]—thank you. Six days ago, Russia's Vladimir Putin sought to shake the very foundations of the free world, thinking he could make it bend to his menacing ways. But he badly miscalculated. He thought he could roll into Ukraine and the world would roll over. Instead, he met with a wall of strength he never anticipated or imagined. He met the Ukrainian people.\\n\" +\n", + " 'From President Zelenskiy, their—to every Ukrainian, their fearlessness, their courage, their determination literally inspires the world. Groups of citizens blocking tanks with their bodies. Everyone from students to retirees, to teachers turned soldiers defending their homeland. And in this struggle—President Zelenskiy said in his speech to the European Parliament, \"Light will win over darkness.\"\\n' +\n", + " \"The Ukrainian Ambassador to the United States is here tonight sitting with the First Lady. Let each of us, if you're able to stand, stand and send an unmistakable signal to the world and Ukraine. Thank you. Thank you, thank you, thank you. She's bright, she's strong, and she's resolved. Yes. We, the United States of America, stand with the Ukrainian people.\\n\" +\n", + " \"Throughout our history, we've learned this lesson: When dictators do not pay a price for their aggression, they cause more chaos; they keep moving; and the costs, the threats to the America—and America, to the world keeps rising. That's why the NATO alliance was created: to secure peace and stability in Europe after World War II. The United States is a member, along with 29 other nations. It matters. American diplomacy matters. American resolve matters.\\n\" +\n", + " \"Putin's latest attack on Ukraine was premeditated and totally unprovoked. He rejected repeated—repeated—efforts at diplomacy. He thought the West and NATO wouldn't respond. He thought he could divide us at home, in this Chamber, in this Nation. He thought he could divide us in Europe as well.\\n\" +\n", + " \"But Putin was wrong. We are ready. We are united. And that's what we did: We stayed united. We prepared extensively and carefully. We spent months building coalitions of other freedom-loving nations in Europe and the Americas to—from America to the Asian and African continents to confront Putin.\\n\" +\n", + " \"Like many of you, I spent countless hours unifying our European allies. We shared with the world in advance what we knew Putin was planning and precisely how he would try to falsify and justify his aggression. We countered Russia's lies with the truth. And now—now that he's acted, the free world is holding him accountable, along with 27 members of the European Union—including France, Germany, Italy—as well as countries like the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and many others—even Switzerland—are inflicting pain on Russia and supporting the people of Ukraine. Putin is now isolated from the world more than he has ever been.\\n\" +\n", + " \"Together, along with our allies, we are right now enforcing powerful economic sanctions. We're cutting off Russia's largest banks from the international financial system; preventing Russia's Central Bank from defending the Russian ruble, making Putin's $630 billion war fund worthless. We're choking Russia's access to technology that will sap its economic strength and weaken its military for years to come.\\n\" +\n", + " 'Tonight I say to the Russian oligarchs and the corrupt leaders who have bilked billions of dollars off this violent regime: No more. The United States—[applause]—I mean it. The United States Department of Justice is assembling a dedicated task force to go after the crimes of the Russian oligarchs.\\n' +\n", + " \"We're joining with European allies to find and seize their yachts, their luxury apartments, their private jets. We're coming for your ill-begotten gains. And tonight I'm announcing that we will join our allies in closing off American air space to all Russian flights, further isolating Russia and adding an additional squeeze on their economy.\\n\" +\n", + " \"He has no idea what's coming. The ruble has already lost 30 percent of its value, the Russian stock market has lost 40 percent of its value, and trading remains suspended. The Russian economy is reeling, and Putin alone is the one to blame.\\n\" +\n", + " \"Together with our allies, we're providing support to the Ukrainians in their fight for freedom: military assistance, economic assistance, humanitarian assistance. We're giving more than a billion dollars in direct assistance to Ukraine. And we'll continue to aid the Ukrainian people as they defend their country and help ease their suffering.\\n\" +\n", + " \"But let me be clear: Our Forces are not engaged and will not engage in the conflict with Russian forces in Ukraine. Our Forces are not going to Europe to fight [in]* Ukraine but to defend our NATO allies in the event that Putin decides to keep moving west. For that purpose, we have mobilized American ground forces, air squadrons, ship deployments to protect NATO countries, including Poland, Romania, Latvia, Lithuania, and Estonia. And as I've made crystal clear, the United States and our allies will defend every inch of territory that is NATO territory with the full force of our collective power—every single inch.\\n\" +\n", + " \"And we're clear eyed. The Ukrainians are fighting back with pure courage. But the next few days, weeks, and months will be hard on them. Putin has unleashed violence and chaos. But while he may make gains on the battlefield, he'll pay a continuing high price over the long run. And a pound of Ukrainian people—the proud, proud people—pound for pound, ready to fight with every inch of energy they have. They've known 30 years of independence—have repeatedly shown that they will not tolerate anyone who tries to take their country backwards.\\n\" +\n", + " \"To all Americans, I'll be honest with you, as I've always promised I would be. A Russian dictator invading a foreign country has costs around the world. And I'm taking robust action to make sure the pain of our sanctions is targeted at Russian economy and that we use every tool at our disposal to protect American businesses and consumers.\\n\" +\n", + " 'Tonight I can announce the United States has worked with 30 other countries to release 60 million barrels of oil from reserves around the world. America will lead that effort, releasing 30 million barrels of our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, united with our allies.\\n' +\n", + " \"These steps will help blunt gas prices here at home. But I know news about what's happening can seem alarming to all Americans. But I want you to know: We're going to be okay. We're going to be okay.\\n\" +\n", + " \"When the history of this era is written, Putin's war on Ukraine will have left Russia weaker and the rest of the world stronger.\\n\" +\n", + " \"While it shouldn't have taken something so terrible for people around the world to see what's at stake, now everyone sees it clearly. We see the unity among leaders of nations, a more unified Europe, a more unified West. We see unity among the people who are gathering in cities in large crowds around the world, even in Russia, to demonstrate their support for the people of Ukraine.\\n\" +\n", + " \"In the battle between democracy and autocracies, democracies are rising to the moment, and the world is clearly choosing the side of peace and security. This is the real test, and it's going to take time. So let us continue to draw inspiration from the iron will of the Ukrainian people.\\n\" +\n", + " \"To our fellow Ukrainian Americans who forged a deep bond that connects our two nations: We stand with you. We stand with you. Putin may circle Kiev with tanks, but he'll never gain the hearts and souls of the Uranian [Ukrainian]* people. He'll never extinguish their love of freedom. And he will never, never weaken the resolve of the free world.\\n\" +\n", + " 'We meet tonight in an America that has lived through 2 of the hardest years this Nation has ever faced. The pandemic has been punishing. And so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more.\\n' +\n", + " \"I understand, like many of you did. My dad had to leave his home in Scranton, Pennsylvania, to find work. So, like many of you, I grew up in a family when the price of food went up, it was felt throughout the family; it had an impact. That's why one of the first things I did as President was fight to pass the American Rescue Plan, because people were hurting. We needed to act, and we did.\\n\" +\n", + " 'Few pieces of legislation have done more at a critical moment in our history to lift us out of a crisis. It fueled our efforts to vaccinate the Nation and combat COVID-19. It delivered immediate economic relief to tens of millions of Americans. It helped put food on the table. Remember those long lines of cars waiting for hours just to get a box of food put in their trunk? It cut the cost of health care insurance. And as my dad used to say, it gave the people \"just a little bit of breathing room.\"\\n' +\n", + " 'And unlike the $2 trillion tax cut passed in the previous administration that benefited the top 1 percent of Americans, the American Rescue Plan——\\n' +\n", + " ' Audience members. Boo!\\n' +\n", + " ' The President. ——the American Rescue Plan helped working people and left no one behind. And, folks—and it worked. It worked. It worked and created jobs, lots of jobs. In fact, our economy created over 6.5 million new jobs just last year, more jobs in 1 year than ever before in the history of the United States of America. The economy grew at a rate of 5.7 last year, the strongest growth rate in 40 years and the first step in'... 35254 more characters,\n", + " metadata: {\n", + " score: 0.16301880776882172,\n", + " title: 'Address Before a Joint Session of the Congress on the State of the Union',\n", + " id: 'https://www.presidency.ucsb.edu/documents/address-before-joint-session-the-congress-the-state-the-union-28',\n", + " url: 'https://www.presidency.ucsb.edu/documents/address-before-joint-session-the-congress-the-state-the-union-28',\n", + " publishedDate: '2022-03-01',\n", + " author: ''\n", + " },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "const query = \"What did the speaker say about Justice Breyer in the 2022 State of the Union?\";\n", + "\n", + "await retriever.invoke(query);" + ] + }, + { + "cell_type": "markdown", + "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e", + "metadata": {}, + "source": [ + "## Use within a chain\n", + "\n", + "Like other retrievers, ExaRetriever can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", + "\n", + "We will need a LLM or chat model:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "25b647a3-f8f2-4541-a289-7a241e43f9df", + "metadata": {}, + "outputs": [], + "source": [ + "// @ls-docs-hide-cell\n", + "\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({\n", + " model: \"gpt-4o-mini\",\n", + " temperature: 0,\n", + "});" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae", + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", + "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", + "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", + "\n", + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromTemplate(`\n", + "Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}`);\n", + "\n", + "const formatDocs = (docs: Document[]) => {\n", + " return docs.map((doc) => doc.pageContent).join(\"\\n\\n\");\n", + "}\n", + "\n", + "// See https://js.langchain.com/v0.2/docs/tutorials/rag\n", + "const ragChain = RunnableSequence.from([\n", + " {\n", + " context: retriever.pipe(formatDocs),\n", + " question: new RunnablePassthrough(),\n", + " },\n", + " prompt,\n", + " llm,\n", + " new StringOutputParser(),\n", + "]);" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "In the 2022 State of the Union Address, the speaker, President Biden, honored Justice Breyer, describing him as someone who has dedicated his life to serve the country. He acknowledged Justice Breyer as an Army veteran and a constitutional scholar, and he expressed gratitude for his service. President Biden also mentioned that one of the most serious constitutional responsibilities of a President is nominating someone to serve on the United States Supreme Court, and he highlighted his nomination of Ketanji Brown Jackson to succeed Justice Breyer.\n" + ] + } + ], + "source": [ + "await ragChain.invoke(\"What did the speaker say about Justice Breyer in the 2022 State of the Union?\");" + ] + }, + { + "cell_type": "markdown", + "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all ExaRetriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_exa.ExaRetriever.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/retrievers/exa.mdx b/docs/core_docs/docs/integrations/retrievers/exa.mdx deleted file mode 100644 index 14e2d78e23d7..000000000000 --- a/docs/core_docs/docs/integrations/retrievers/exa.mdx +++ /dev/null @@ -1,24 +0,0 @@ -# Exa Search - -The Exa Search API provides a new search experience designed for LLMs. - -## Usage - -First, install the LangChain integration package for Exa: - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/exa -``` - -You'll need to set your API key as an environment variable. - -The `Exa` class defaults to `EXASEARCH_API_KEY` when searching for your API key. - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/retrievers/exa.ts"; - -{Example} diff --git a/docs/core_docs/docs/integrations/text_embedding/openai.ipynb b/docs/core_docs/docs/integrations/text_embedding/openai.ipynb new file mode 100644 index 000000000000..3c820eec3d55 --- /dev/null +++ b/docs/core_docs/docs/integrations/text_embedding/openai.ipynb @@ -0,0 +1,418 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: OpenAI\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "9a3d6f34", + "metadata": {}, + "source": [ + "# OpenAI\n", + "\n", + "This will help you get started with OpenAIEmbeddings [embedding models](/docs/concepts#embedding-models) using LangChain. For detailed documentation on `OpenAIEmbeddings` features and configuration options, please refer to the [API reference](https://api.js.langchain.com/classes/langchain_openai.OpenAIEmbeddings.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | [Py support](https://python.langchain.com/docs/integrations/text_embedding/openai/) | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: |\n", + "| [OpenAIEmbeddings](https://api.js.langchain.com/classes/langchain_openai.OpenAIEmbeddings.html) | [@langchain/openai](https://api.js.langchain.com/modules/langchain_openai.html) | ❌ | ✅ | ![NPM - Downloads](https://img.shields.io/npm/dm/@langchain/openai?style=flat-square&label=%20&) | ![NPM - Version](https://img.shields.io/npm/v/@langchain/openai?style=flat-square&label=%20&) |\n", + "\n", + "## Setup\n", + "\n", + "To access OpenAIEmbeddings embedding models you'll need to create an OpenAI account, get an API key, and install the `@langchain/openai` integration package.\n", + "\n", + "### Credentials\n", + "\n", + "Head to [platform.openai.com](https://platform.openai.com) to sign up to OpenAI and generate an API key. Once you've done this set the `OPENAI_API_KEY` environment variable:\n", + "\n", + "```bash\n", + "export OPENAI_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```bash\n", + "# export LANGCHAIN_TRACING_V2=\"true\"\n", + "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain OpenAIEmbeddings integration lives in the `@langchain/openai` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/openai\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "45dd1724", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and generate chat completions:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "9ea7a09b", + "metadata": {}, + "outputs": [], + "source": [ + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "\n", + "const embeddings = new OpenAIEmbeddings({\n", + " apiKey: \"YOUR-API-KEY\", // In Node.js defaults to process.env.OPENAI_API_KEY\n", + " batchSize: 512, // Default value if omitted is 512. Max is 2048\n", + " model: \"text-embedding-3-large\",\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "fb4153d3", + "metadata": {}, + "source": [ + "If you're part of an organization, you can set `process.env.OPENAI_ORGANIZATION` to your OpenAI organization id, or pass it in as `organization` when\n", + "initializing the model." + ] + }, + { + "cell_type": "markdown", + "id": "77d271b6", + "metadata": {}, + "source": [ + "## Indexing and Retrieval\n", + "\n", + "Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our RAG tutorials under the [working with external knowledge tutorials](/docs/tutorials/#working-with-external-knowledge).\n", + "\n", + "Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document using the demo [`MemoryVectorStore`](/docs/integrations/vectorstores/memory)." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d817716b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LangChain is the framework for building context-aware reasoning applications\n" + ] + } + ], + "source": [ + "// Create a vector store with a sample text\n", + "import { MemoryVectorStore } from \"langchain/vectorstores/memory\";\n", + "\n", + "const text = \"LangChain is the framework for building context-aware reasoning applications\";\n", + "\n", + "const vectorstore = await MemoryVectorStore.fromDocuments(\n", + " [{ pageContent: text, metadata: {} }],\n", + " embeddings,\n", + ");\n", + "\n", + "// Use the vector store as a retriever that returns a single document\n", + "const retriever = vectorstore.asRetriever(1);\n", + "\n", + "// Retrieve the most similar text\n", + "const retrievedDocuments = await retriever.invoke(\"What is LangChain?\");\n", + "\n", + "retrievedDocuments[0].pageContent;" + ] + }, + { + "cell_type": "markdown", + "id": "e02b9855", + "metadata": {}, + "source": [ + "## Direct Usage\n", + "\n", + "Under the hood, the vectorstore and retriever implementations are calling `embeddings.embedDocument(...)` and `embeddings.embedQuery(...)` to create embeddings for the text(s) used in `fromDocuments` and the retriever's `invoke` operations, respectively.\n", + "\n", + "You can directly call these methods to get embeddings for your own use cases.\n", + "\n", + "### Embed single texts\n", + "\n", + "You can embed queries for search with `embedQuery`. This generates a vector representation specific to the query:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0d2befcd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " -0.01927683, 0.0037708976, -0.032942563, 0.0037671267, 0.008175306,\n", + " -0.012511838, -0.009713832, 0.021403614, -0.015377721, 0.0018684798,\n", + " 0.020574018, 0.022399133, -0.02322873, -0.01524951, -0.00504169,\n", + " -0.007375876, -0.03448109, 0.00015130726, 0.021388533, -0.012564631,\n", + " -0.020031009, 0.027406884, -0.039217334, 0.03036327, 0.030393435,\n", + " -0.021750538, 0.032610722, -0.021162277, -0.025898525, 0.018869571,\n", + " 0.034179416, -0.013371604, 0.0037652412, -0.02146395, 0.0012641934,\n", + " -0.055688616, 0.05104287, 0.0024982197, -0.019095825, 0.0037369595,\n", + " 0.00088757504, 0.025189597, -0.018779071, 0.024978427, 0.016833287,\n", + " -0.0025868358, -0.011727491, -0.0021154736, -0.017738303, 0.0013839195,\n", + " -0.0131151825, -0.05405959, 0.029729757, -0.003393808, 0.019774588,\n", + " 0.028885076, 0.004355387, 0.026094612, 0.06479911, 0.038040817,\n", + " -0.03478276, -0.012594799, -0.024767255, -0.0031430433, 0.017874055,\n", + " -0.015294761, 0.005709139, 0.025355516, 0.044798266, 0.02549127,\n", + " -0.02524993, 0.00014553308, -0.019427665, -0.023545485, 0.008748483,\n", + " 0.019850006, -0.028417485, -0.001860938, -0.02318348, -0.010799851,\n", + " 0.04793565, -0.0048983963, 0.02193154, -0.026411368, 0.026426451,\n", + " -0.012149832, 0.035355937, -0.047814984, -0.027165547, -0.008228099,\n", + " -0.007737882, 0.023726488, -0.046487626, -0.007783133, -0.019638835,\n", + " 0.01793439, -0.018024892, 0.0030336871, -0.019578502, 0.0042837397\n", + "]\n" + ] + } + ], + "source": [ + "const singleVector = await embeddings.embedQuery(text);\n", + "\n", + "console.log(singleVector.slice(0, 100));" + ] + }, + { + "cell_type": "markdown", + "id": "1b5a7d03", + "metadata": {}, + "source": [ + "### Embed multiple texts\n", + "\n", + "You can embed multiple texts for indexing with `embedDocuments`. The internals used for this method may (but do not have to) differ from embedding queries:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2f4d6e97", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " -0.01927683, 0.0037708976, -0.032942563, 0.0037671267, 0.008175306,\n", + " -0.012511838, -0.009713832, 0.021403614, -0.015377721, 0.0018684798,\n", + " 0.020574018, 0.022399133, -0.02322873, -0.01524951, -0.00504169,\n", + " -0.007375876, -0.03448109, 0.00015130726, 0.021388533, -0.012564631,\n", + " -0.020031009, 0.027406884, -0.039217334, 0.03036327, 0.030393435,\n", + " -0.021750538, 0.032610722, -0.021162277, -0.025898525, 0.018869571,\n", + " 0.034179416, -0.013371604, 0.0037652412, -0.02146395, 0.0012641934,\n", + " -0.055688616, 0.05104287, 0.0024982197, -0.019095825, 0.0037369595,\n", + " 0.00088757504, 0.025189597, -0.018779071, 0.024978427, 0.016833287,\n", + " -0.0025868358, -0.011727491, -0.0021154736, -0.017738303, 0.0013839195,\n", + " -0.0131151825, -0.05405959, 0.029729757, -0.003393808, 0.019774588,\n", + " 0.028885076, 0.004355387, 0.026094612, 0.06479911, 0.038040817,\n", + " -0.03478276, -0.012594799, -0.024767255, -0.0031430433, 0.017874055,\n", + " -0.015294761, 0.005709139, 0.025355516, 0.044798266, 0.02549127,\n", + " -0.02524993, 0.00014553308, -0.019427665, -0.023545485, 0.008748483,\n", + " 0.019850006, -0.028417485, -0.001860938, -0.02318348, -0.010799851,\n", + " 0.04793565, -0.0048983963, 0.02193154, -0.026411368, 0.026426451,\n", + " -0.012149832, 0.035355937, -0.047814984, -0.027165547, -0.008228099,\n", + " -0.007737882, 0.023726488, -0.046487626, -0.007783133, -0.019638835,\n", + " 0.01793439, -0.018024892, 0.0030336871, -0.019578502, 0.0042837397\n", + "]\n", + "[\n", + " -0.010181213, 0.023419594, -0.04215527, -0.0015320902, -0.023573855,\n", + " -0.0091644935, -0.014893179, 0.019016149, -0.023475688, 0.0010219777,\n", + " 0.009255648, 0.03996757, -0.04366983, -0.01640774, -0.020194141,\n", + " 0.019408813, -0.027977299, -0.022017224, 0.013539891, -0.007769135,\n", + " 0.032647192, -0.015089511, -0.022900717, 0.023798235, 0.026084099,\n", + " -0.024625633, 0.035003178, -0.017978394, -0.049615882, 0.013364594,\n", + " 0.031132633, 0.019142363, 0.023195215, -0.038396914, 0.005584942,\n", + " -0.031946007, 0.053682756, -0.0036356465, 0.011240003, 0.0056690844,\n", + " -0.0062791156, 0.044146635, -0.037387207, 0.01300699, 0.018946031,\n", + " 0.0050415234, 0.029618073, -0.021750772, -0.000649473, 0.00026951815,\n", + " -0.014710871, -0.029814405, 0.04204308, -0.014710871, 0.0039616977,\n", + " -0.021512369, 0.054608323, 0.021484323, 0.02790718, -0.010573876,\n", + " -0.023952495, -0.035143413, -0.048802506, -0.0075798146, 0.023279356,\n", + " -0.022690361, -0.016590048, 0.0060477243, 0.014100839, 0.005476258,\n", + " -0.017221114, -0.0100059165, -0.017922299, -0.021989176, 0.01830094,\n", + " 0.05516927, 0.001033372, 0.0017310516, -0.00960624, -0.037864015,\n", + " 0.013063084, 0.006591143, -0.010160177, 0.0011394264, 0.04953174,\n", + " 0.004806626, 0.029421741, -0.037751824, 0.003618117, 0.007162609,\n", + " 0.027696826, -0.0021070621, -0.024485396, -0.0042141243, -0.02801937,\n", + " -0.019605145, 0.016281527, -0.035143413, 0.01640774, 0.042323552\n", + "]\n" + ] + } + ], + "source": [ + "const text2 = \"LangGraph is a library for building stateful, multi-actor applications with LLMs\";\n", + "\n", + "const vectors = await embeddings.embedDocuments([text, text2]);\n", + "\n", + "console.log(vectors[0].slice(0, 100));\n", + "console.log(vectors[1].slice(0, 100));" + ] + }, + { + "cell_type": "markdown", + "id": "2b1a3527", + "metadata": {}, + "source": [ + "## Specifying dimensions\n", + "\n", + "With the `text-embedding-3` class of models, you can specify the size of the embeddings you want returned. For example by default `text-embedding-3-large` returns embeddings of dimension 3072:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a611fe1a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3072\n" + ] + } + ], + "source": [ + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "\n", + "const embeddingsDefaultDimensions = new OpenAIEmbeddings({\n", + " model: \"text-embedding-3-large\",\n", + "});\n", + "\n", + "const vectorsDefaultDimensions = await embeddingsDefaultDimensions.embedDocuments([\"some text\"]);\n", + "console.log(vectorsDefaultDimensions[0].length);" + ] + }, + { + "cell_type": "markdown", + "id": "08efe771", + "metadata": {}, + "source": [ + "But by passing in `dimensions: 1024` we can reduce the size of our embeddings to 1024:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "19667fdb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1024\n" + ] + } + ], + "source": [ + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "\n", + "const embeddings1024 = new OpenAIEmbeddings({\n", + " model: \"text-embedding-3-large\",\n", + " dimensions: 1024,\n", + "});\n", + "\n", + "const vectors1024 = await embeddings1024.embedDocuments([\"some text\"]);\n", + "console.log(vectors1024[0].length);" + ] + }, + { + "cell_type": "markdown", + "id": "6b84c0df", + "metadata": {}, + "source": [ + "## Custom URLs\n", + "\n", + "You can customize the base URL the SDK sends requests to by passing a `configuration` parameter like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3bfa20a6", + "metadata": {}, + "outputs": [], + "source": [ + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "\n", + "const model = new OpenAIEmbeddings({\n", + " configuration: {\n", + " baseURL: \"https://your_custom_url.com\",\n", + " },\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "ac3cac9b", + "metadata": {}, + "source": [ + "You can also pass other `ClientOptions` parameters accepted by the official SDK.\n", + "\n", + "If you are hosting on Azure OpenAI, see the [dedicated page instead](/docs/integrations/text_embedding/azure_openai)." + ] + }, + { + "cell_type": "markdown", + "id": "8938e581", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all OpenAIEmbeddings features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_openai.OpenAIEmbeddings.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/text_embedding/openai.mdx b/docs/core_docs/docs/integrations/text_embedding/openai.mdx deleted file mode 100644 index bba94b0777ee..000000000000 --- a/docs/core_docs/docs/integrations/text_embedding/openai.mdx +++ /dev/null @@ -1,77 +0,0 @@ ---- -keywords: [openaiembeddings] ---- - -# OpenAI - -The `OpenAIEmbeddings` class uses the OpenAI API to generate embeddings for a given text. By default it strips new line characters from the text, as recommended by OpenAI, but you can disable this by passing `stripNewLines: false` to the constructor. - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/openai -``` - -```typescript -import { OpenAIEmbeddings } from "@langchain/openai"; - -const embeddings = new OpenAIEmbeddings({ - apiKey: "YOUR-API-KEY", // In Node.js defaults to process.env.OPENAI_API_KEY - batchSize: 512, // Default value if omitted is 512. Max is 2048 - model: "text-embedding-3-large", -}); -``` - -If you're part of an organization, you can set `process.env.OPENAI_ORGANIZATION` to your OpenAI organization id, or pass it in as `organization` when -initializing the model. - -## Specifying dimensions - -With the `text-embedding-3` class of models, you can specify the size of the embeddings you want returned. For example by default `text-embedding-3-large` returns embeddings of dimension 3072: - -```typescript -const embeddings = new OpenAIEmbeddings({ - model: "text-embedding-3-large", -}); - -const vectors = await embeddings.embedDocuments(["some text"]); -console.log(vectors[0].length); -``` - -``` -3072 -``` - -But by passing in `dimensions: 1024` we can reduce the size of our embeddings to 1024: - -```typescript -const embeddings1024 = new OpenAIEmbeddings({ - model: "text-embedding-3-large", - dimensions: 1024, -}); - -const vectors2 = await embeddings1024.embedDocuments(["some text"]); -console.log(vectors2[0].length); -``` - -``` -1024 -``` - -## Custom URLs - -You can customize the base URL the SDK sends requests to by passing a `configuration` parameter like this: - -```typescript -const model = new OpenAIEmbeddings({ - configuration: { - baseURL: "https://your_custom_url.com", - }, -}); -``` - -You can also pass other `ClientOptions` parameters accepted by the official SDK. - -If you are hosting on Azure OpenAI, see the [dedicated page instead](/docs/integrations/text_embedding/azure_openai). diff --git a/docs/core_docs/docs/integrations/vectorstores/memory.mdx b/docs/core_docs/docs/integrations/vectorstores/memory.mdx index fc133e3feeb6..df34fb92a72c 100644 --- a/docs/core_docs/docs/integrations/vectorstores/memory.mdx +++ b/docs/core_docs/docs/integrations/vectorstores/memory.mdx @@ -34,6 +34,18 @@ import ExampleLoader from "@examples/indexes/vector_stores/memory_fromdocs.ts"; ### Use a custom similarity metric -import ExampleCustom from "@examples/indexes/vector_stores/memory_custom_similarity.ts"; - -{ExampleCustom} +```ts +import { MemoryVectorStore } from "langchain/vectorstores/memory"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { similarity } from "ml-distance"; + +const vectorStore = await MemoryVectorStore.fromTexts( + ["Hello world", "Bye bye", "hello nice world"], + [{ id: 2 }, { id: 1 }, { id: 3 }], + new OpenAIEmbeddings(), + { similarity: similarity.pearson } +); + +const resultOne = await vectorStore.similaritySearch("hello world", 1); +console.log(resultOne); +``` diff --git a/docs/core_docs/scripts/quarto-build.js b/docs/core_docs/scripts/quarto-build.js index e502be91dd7d..128111f8d095 100644 --- a/docs/core_docs/scripts/quarto-build.js +++ b/docs/core_docs/scripts/quarto-build.js @@ -3,6 +3,7 @@ const { glob } = require("glob"); const { execSync } = require("node:child_process"); const IGNORED_CELL_REGEX = /```\w*?\n\/\/ ?@lc-docs-hide-cell\n[\s\S]*?```/g; +const LC_TS_IGNORE_REGEX = /\/\/ ?@lc-ts-ignore\n/g; async function main() { const allIpynb = await glob("./docs/**/*.ipynb"); @@ -20,8 +21,13 @@ async function main() { for (const renamedFilepath of allRenames) { if (fs.existsSync(renamedFilepath)) { let content = fs.readFileSync(renamedFilepath).toString(); - if (content.match(IGNORED_CELL_REGEX)) { - content = content.replace(IGNORED_CELL_REGEX, ""); + if ( + content.match(IGNORED_CELL_REGEX) || + content.match(LC_TS_IGNORE_REGEX) + ) { + content = content + .replace(IGNORED_CELL_REGEX, "") + .replace(LC_TS_IGNORE_REGEX, ""); fs.writeFileSync(renamedFilepath, content); } } diff --git a/docs/core_docs/scripts/validate_notebook.ts b/docs/core_docs/scripts/validate_notebook.ts index 7afdd366cc4a..fd0c4cba7242 100644 --- a/docs/core_docs/scripts/validate_notebook.ts +++ b/docs/core_docs/scripts/validate_notebook.ts @@ -8,8 +8,11 @@ export function extract(filepath: string) { const sourceFile = project.createSourceFile("temp.ts", ""); cells.forEach((cell: Record) => { + const source = cell.source + .join("") + .replace(/\/\/ ?@lc-ts-ignore/g, "// @ts-ignore"); if (cell.cell_type === "code") { - sourceFile.addStatements(cell.source.join("")); + sourceFile.addStatements(source); } }); diff --git a/environment_tests/docker-compose.yml b/environment_tests/docker-compose.yml index aeaed8bd108b..fec57cef9a16 100644 --- a/environment_tests/docker-compose.yml +++ b/environment_tests/docker-compose.yml @@ -152,6 +152,8 @@ services: condition: service_completed_successfully test-exports-esm: condition: service_completed_successfully + test-exports-tsc: + condition: service_completed_successfully test-exports-cjs: condition: service_completed_successfully test-exports-cf: diff --git a/examples/package.json b/examples/package.json index 5be71f5be945..4c97b78e92c5 100644 --- a/examples/package.json +++ b/examples/package.json @@ -91,7 +91,6 @@ "js-yaml": "^4.1.0", "langchain": "workspace:*", "langsmith": "^0.1.30", - "ml-distance": "^4.0.0", "mongodb": "^6.3.0", "pg": "^8.11.0", "pickleparser": "^0.2.1", diff --git a/examples/src/indexes/vector_stores/memory_custom_similarity.ts b/examples/src/indexes/vector_stores/memory_custom_similarity.ts deleted file mode 100644 index 6d7453f0e356..000000000000 --- a/examples/src/indexes/vector_stores/memory_custom_similarity.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { MemoryVectorStore } from "langchain/vectorstores/memory"; -import { OpenAIEmbeddings } from "@langchain/openai"; -import { similarity } from "ml-distance"; - -const vectorStore = await MemoryVectorStore.fromTexts( - ["Hello world", "Bye bye", "hello nice world"], - [{ id: 2 }, { id: 1 }, { id: 3 }], - new OpenAIEmbeddings(), - { similarity: similarity.pearson } -); - -const resultOne = await vectorStore.similaritySearch("hello world", 1); -console.log(resultOne); diff --git a/langchain-core/.eslintrc.cjs b/langchain-core/.eslintrc.cjs index 39b49268e8ee..e903d44e5c71 100644 --- a/langchain-core/.eslintrc.cjs +++ b/langchain-core/.eslintrc.cjs @@ -62,6 +62,7 @@ module.exports = { "no-use-before-define": 0, "no-useless-constructor": 0, "no-return-await": 0, + "no-plusplus": 0, "consistent-return": 0, "no-else-return": 0, "func-names": 0, diff --git a/langchain-core/package.json b/langchain-core/package.json index 387f154b5170..2301a805215d 100644 --- a/langchain-core/package.json +++ b/langchain-core/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/core", - "version": "0.2.18", + "version": "0.2.19", "description": "Core LangChain.js abstractions and schemas", "type": "module", "engines": { @@ -46,7 +46,6 @@ "decamelize": "1.2.0", "js-tiktoken": "^1.0.12", "langsmith": "~0.1.39", - "ml-distance": "^4.0.0", "mustache": "^4.2.0", "p-queue": "^6.6.2", "p-retry": "4", diff --git a/langchain-core/src/language_models/chat_models.ts b/langchain-core/src/language_models/chat_models.ts index 96a57348ffe4..bb952e43d918 100644 --- a/langchain-core/src/language_models/chat_models.ts +++ b/langchain-core/src/language_models/chat_models.ts @@ -123,17 +123,6 @@ export type LangSmithParams = { ls_stop?: Array; }; -interface ChatModelGenerateCachedParameters< - T extends BaseChatModel, - CallOptions extends BaseChatModelCallOptions = BaseChatModelCallOptions -> { - messages: BaseMessageLike[][]; - cache: BaseCache; - llmStringKey: string; - parsedOptions: T["ParsedCallOptions"]; - handledOptions: RunnableConfig; -} - /** * Base class for chat models. It extends the BaseLanguageModel class and * provides methods for generating chat based on input messages. @@ -449,9 +438,14 @@ export abstract class BaseChatModel< llmStringKey, parsedOptions, handledOptions, - }: ChatModelGenerateCachedParameters): Promise< - LLMResult & { missingPromptIndices: number[] } - > { + }: { + messages: BaseMessageLike[][]; + cache: BaseCache; + llmStringKey: string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + parsedOptions: any; + handledOptions: RunnableConfig; + }): Promise { const baseMessages = messages.map((messageList) => messageList.map(coerceMessageLikeToMessage) ); diff --git a/langchain-core/src/language_models/llms.ts b/langchain-core/src/language_models/llms.ts index a8cc4e941960..3aeb2a879bdc 100644 --- a/langchain-core/src/language_models/llms.ts +++ b/langchain-core/src/language_models/llms.ts @@ -43,18 +43,6 @@ export interface BaseLLMParams extends BaseLanguageModelParams { export interface BaseLLMCallOptions extends BaseLanguageModelCallOptions {} -interface LLMGenerateCachedParameters< - T extends BaseLLM, - CallOptions extends BaseLLMCallOptions = BaseLLMCallOptions -> { - prompts: string[]; - cache: BaseCache; - llmStringKey: string; - parsedOptions: T["ParsedCallOptions"]; - handledOptions: RunnableConfig; - runId?: string; -} - /** * LLM Wrapper. Takes in a prompt (or prompts) and returns a string. */ @@ -351,9 +339,15 @@ export abstract class BaseLLM< parsedOptions, handledOptions, runId, - }: LLMGenerateCachedParameters): Promise< - LLMResult & { missingPromptIndices: number[] } - > { + }: { + prompts: string[]; + cache: BaseCache; + llmStringKey: string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + parsedOptions: any; + handledOptions: RunnableConfig; + runId?: string; + }): Promise { const callbackManager_ = await CallbackManager.configure( handledOptions.callbacks, this.callbacks, diff --git a/langchain-core/src/utils/math.ts b/langchain-core/src/utils/math.ts index fe703c2d5f79..68cf439831d7 100644 --- a/langchain-core/src/utils/math.ts +++ b/langchain-core/src/utils/math.ts @@ -1,7 +1,6 @@ -import { - similarity as ml_distance_similarity, - distance as ml_distance, -} from "ml-distance"; +import { cosine } from "./ml-distance/similarities.js"; +import { innerProduct as innerProductDistance } from "./ml-distance/distances.js"; +import { euclidean } from "./ml-distance-euclidean/euclidean.js"; type VectorFunction = (xVector: number[], yVector: number[]) => number; @@ -65,15 +64,15 @@ export function normalize(M: number[][], similarity = false): number[][] { * @returns {number[][] | [[]]} A matrix where each row represents the cosine similarity values between the corresponding rows of X and Y. */ export function cosineSimilarity(X: number[][], Y: number[][]): number[][] { - return matrixFunc(X, Y, ml_distance_similarity.cosine); + return matrixFunc(X, Y, cosine); } export function innerProduct(X: number[][], Y: number[][]): number[][] { - return matrixFunc(X, Y, ml_distance.innerProduct); + return matrixFunc(X, Y, innerProductDistance); } export function euclideanDistance(X: number[][], Y: number[][]): number[][] { - return matrixFunc(X, Y, ml_distance.euclidean); + return matrixFunc(X, Y, euclidean); } /** diff --git a/langchain-core/src/utils/ml-distance-euclidean/LICENSE b/langchain-core/src/utils/ml-distance-euclidean/LICENSE new file mode 100644 index 000000000000..fa5c2fc3349d --- /dev/null +++ b/langchain-core/src/utils/ml-distance-euclidean/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 ml.js + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/langchain-core/src/utils/ml-distance-euclidean/euclidean.ts b/langchain-core/src/utils/ml-distance-euclidean/euclidean.ts new file mode 100644 index 000000000000..800638c60f0a --- /dev/null +++ b/langchain-core/src/utils/ml-distance-euclidean/euclidean.ts @@ -0,0 +1,11 @@ +export function squaredEuclidean(p: number[], q: number[]) { + let d = 0; + for (let i = 0; i < p.length; i++) { + d += (p[i] - q[i]) * (p[i] - q[i]); + } + return d; +} + +export function euclidean(p: number[], q: number[]) { + return Math.sqrt(squaredEuclidean(p, q)); +} diff --git a/langchain-core/src/utils/ml-distance/LICENSE b/langchain-core/src/utils/ml-distance/LICENSE new file mode 100644 index 000000000000..6b7d9fe61f96 --- /dev/null +++ b/langchain-core/src/utils/ml-distance/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2014 ml.js + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/langchain-core/src/utils/ml-distance/distances.ts b/langchain-core/src/utils/ml-distance/distances.ts new file mode 100644 index 000000000000..98df550df9f5 --- /dev/null +++ b/langchain-core/src/utils/ml-distance/distances.ts @@ -0,0 +1,14 @@ +/** + *Returns the Inner Product similarity between vectors a and b + * @link [Inner Product Similarity algorithm](https://www.naun.org/main/NAUN/ijmmas/mmmas-49.pdf) + * @param a - first vector + * @param b - second vector + * + */ +export function innerProduct(a: number[], b: number[]): number { + let ans = 0; + for (let i = 0; i < a.length; i++) { + ans += a[i] * b[i]; + } + return ans; +} diff --git a/langchain-core/src/utils/ml-distance/similarities.ts b/langchain-core/src/utils/ml-distance/similarities.ts new file mode 100644 index 000000000000..9ea574e14a4f --- /dev/null +++ b/langchain-core/src/utils/ml-distance/similarities.ts @@ -0,0 +1,17 @@ +/** + * Returns the average of cosine distances between vectors a and b + * @param a - first vector + * @param b - second vector + * + */ +export function cosine(a: number[], b: number[]): number { + let p = 0; + let p2 = 0; + let q2 = 0; + for (let i = 0; i < a.length; i++) { + p += a[i] * b[i]; + p2 += a[i] * a[i]; + q2 += b[i] * b[i]; + } + return p / (Math.sqrt(p2) * Math.sqrt(q2)); +} diff --git a/langchain-core/src/utils/testing/index.ts b/langchain-core/src/utils/testing/index.ts index 685fae8d3749..65d197f6c23e 100644 --- a/langchain-core/src/utils/testing/index.ts +++ b/langchain-core/src/utils/testing/index.ts @@ -2,7 +2,6 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ /* eslint-disable @typescript-eslint/no-unused-vars */ -import { similarity as ml_distance_similarity } from "ml-distance"; import { z } from "zod"; import { BaseCallbackConfig, @@ -46,6 +45,7 @@ import { StructuredOutputMethodOptions, } from "../../language_models/base.js"; import { VectorStore } from "../../vectorstores.js"; +import { cosine } from "../ml-distance/similarities.js"; /** * Parser for comma-separated values. It splits the input text by commas @@ -750,7 +750,7 @@ interface MemoryVector { * function. */ export interface FakeVectorStoreArgs { - similarity?: typeof ml_distance_similarity.cosine; + similarity?: typeof cosine; } /** @@ -763,7 +763,7 @@ export class FakeVectorStore extends VectorStore { memoryVectors: MemoryVector[] = []; - similarity: typeof ml_distance_similarity.cosine; + similarity: typeof cosine; _vectorstoreType(): string { return "memory"; @@ -775,7 +775,7 @@ export class FakeVectorStore extends VectorStore { ) { super(embeddings, rest); - this.similarity = similarity ?? ml_distance_similarity.cosine; + this.similarity = similarity ?? cosine; } /** diff --git a/langchain/.eslintrc.cjs b/langchain/.eslintrc.cjs index 2698aa6bf883..a0dc5fadc1ee 100644 --- a/langchain/.eslintrc.cjs +++ b/langchain/.eslintrc.cjs @@ -61,6 +61,7 @@ module.exports = { "no-use-before-define": 0, "no-useless-constructor": 0, "no-return-await": 0, + "no-plusplus": 0, "consistent-return": 0, "no-else-return": 0, "func-names": 0, diff --git a/langchain/package.json b/langchain/package.json index 7fa98d21ce8c..90b14e2b4ed1 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -941,9 +941,7 @@ "js-tiktoken": "^1.0.12", "js-yaml": "^4.1.0", "jsonpointer": "^5.0.1", - "langchainhub": "~0.0.8", - "langsmith": "~0.1.30", - "ml-distance": "^4.0.0", + "langsmith": "~0.1.40", "openapi-types": "^12.1.3", "p-retry": "4", "uuid": "^10.0.0", diff --git a/langchain/src/evaluation/embedding_distance/base.ts b/langchain/src/evaluation/embedding_distance/base.ts index ddecd7067c0f..eed8e453bb70 100644 --- a/langchain/src/evaluation/embedding_distance/base.ts +++ b/langchain/src/evaluation/embedding_distance/base.ts @@ -1,4 +1,3 @@ -import { distance, similarity } from "ml-distance"; import type { EmbeddingsInterface } from "@langchain/core/embeddings"; import { ChainValues } from "@langchain/core/utils/types"; import { OpenAIEmbeddings } from "@langchain/openai"; @@ -13,6 +12,9 @@ import { StringEvaluator, StringEvaluatorArgs, } from "../base.js"; +import { cosine } from "../../util/ml-distance/similarities.js"; +import { chebyshev, manhattan } from "../../util/ml-distance/distances.js"; +import { euclidean } from "../../util/ml-distance-euclidean/euclidean.js"; /** * @@ -58,10 +60,10 @@ export function getDistanceCalculationFunction( ): VectorFunction { const distanceFunctions: { [key in EmbeddingDistanceType]: VectorFunction } = { - cosine: (X: number[], Y: number[]) => 1.0 - similarity.cosine(X, Y), - euclidean: distance.euclidean, - manhattan: distance.manhattan, - chebyshev: distance.chebyshev, + cosine: (X: number[], Y: number[]) => 1.0 - cosine(X, Y), + euclidean, + manhattan, + chebyshev, }; return distanceFunctions[distanceType]; diff --git a/langchain/src/hub.ts b/langchain/src/hub.ts index ac2f958b08a6..53abe8e8c1fc 100644 --- a/langchain/src/hub.ts +++ b/langchain/src/hub.ts @@ -1,4 +1,4 @@ -import { Client, ClientConfiguration, HubPushOptions } from "langchainhub"; +import { Client } from "langsmith"; import { Runnable } from "@langchain/core/runnables"; import { load } from "./load/index.js"; @@ -13,10 +13,30 @@ import { load } from "./load/index.js"; export async function push( repoFullName: string, runnable: Runnable, - options?: HubPushOptions & ClientConfiguration + options?: { + apiKey?: string; + apiUrl?: string; + parentCommitHash?: string; + /** @deprecated Use isPublic instead. */ + newRepoIsPublic?: boolean; + isPublic?: boolean; + /** @deprecated Use description instead. */ + newRepoDescription?: string; + description?: string; + readme?: string; + tags?: string[]; + } ) { const client = new Client(options); - return client.push(repoFullName, JSON.stringify(runnable), options); + const payloadOptions = { + object: runnable, + parentCommitHash: options?.parentCommitHash, + isPublic: options?.isPublic ?? options?.newRepoIsPublic, + description: options?.description ?? options?.newRepoDescription, + readme: options?.readme, + tags: options?.tags, + }; + return client.pushPrompt(repoFullName, payloadOptions); } /** @@ -27,9 +47,11 @@ export async function push( */ export async function pull( ownerRepoCommit: string, - options?: ClientConfiguration + options?: { apiKey?: string; apiUrl?: string; includeModel?: boolean } ) { const client = new Client(options); - const result = await client.pull(ownerRepoCommit); + const result = await client._pullPrompt(ownerRepoCommit, { + includeModel: options?.includeModel, + }); return load(result); } diff --git a/langchain/src/util/ml-distance-euclidean/LICENSE b/langchain/src/util/ml-distance-euclidean/LICENSE new file mode 100644 index 000000000000..fa5c2fc3349d --- /dev/null +++ b/langchain/src/util/ml-distance-euclidean/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 ml.js + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/langchain/src/util/ml-distance-euclidean/euclidean.ts b/langchain/src/util/ml-distance-euclidean/euclidean.ts new file mode 100644 index 000000000000..800638c60f0a --- /dev/null +++ b/langchain/src/util/ml-distance-euclidean/euclidean.ts @@ -0,0 +1,11 @@ +export function squaredEuclidean(p: number[], q: number[]) { + let d = 0; + for (let i = 0; i < p.length; i++) { + d += (p[i] - q[i]) * (p[i] - q[i]); + } + return d; +} + +export function euclidean(p: number[], q: number[]) { + return Math.sqrt(squaredEuclidean(p, q)); +} diff --git a/langchain/src/util/ml-distance/LICENSE b/langchain/src/util/ml-distance/LICENSE new file mode 100644 index 000000000000..6b7d9fe61f96 --- /dev/null +++ b/langchain/src/util/ml-distance/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2014 ml.js + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/langchain/src/util/ml-distance/distances.ts b/langchain/src/util/ml-distance/distances.ts new file mode 100644 index 000000000000..2f717c854d40 --- /dev/null +++ b/langchain/src/util/ml-distance/distances.ts @@ -0,0 +1,49 @@ +/** + *Returns the Inner Product similarity between vectors a and b + * @link [Inner Product Similarity algorithm](https://www.naun.org/main/NAUN/ijmmas/mmmas-49.pdf) + * @param a - first vector + * @param b - second vector + * + */ +export function innerProduct(a: number[], b: number[]): number { + let ans = 0; + for (let i = 0; i < a.length; i++) { + ans += a[i] * b[i]; + } + return ans; +} + +/** + *Returns the Chebyshev distance between vectors a and b + * @link [Chebyshev algorithm](https://en.wikipedia.org/wiki/Chebyshev_distance) + * @param a - first vector + * @param b - second vector + * + */ +export function chebyshev(a: number[], b: number[]): number { + let max = 0; + let aux = 0; + for (let i = 0; i < a.length; i++) { + aux = Math.abs(a[i] - b[i]); + if (max < aux) { + max = aux; + } + } + return max; +} + +/** + *Returns the Manhattan distance between vectors a and b + * @link [Manhattan algorithm](https://www.naun.org/main/NAUN/ijmmas/mmmas-49.pdf) + * @param a - first vector + * @param b - second vector + * + */ + +export function manhattan(a: number[], b: number[]): number { + let d = 0; + for (let i = 0; i < a.length; i++) { + d += Math.abs(a[i] - b[i]); + } + return d; +} diff --git a/langchain/src/util/ml-distance/similarities.ts b/langchain/src/util/ml-distance/similarities.ts new file mode 100644 index 000000000000..9ea574e14a4f --- /dev/null +++ b/langchain/src/util/ml-distance/similarities.ts @@ -0,0 +1,17 @@ +/** + * Returns the average of cosine distances between vectors a and b + * @param a - first vector + * @param b - second vector + * + */ +export function cosine(a: number[], b: number[]): number { + let p = 0; + let p2 = 0; + let q2 = 0; + for (let i = 0; i < a.length; i++) { + p += a[i] * b[i]; + p2 += a[i] * a[i]; + q2 += b[i] * b[i]; + } + return p / (Math.sqrt(p2) * Math.sqrt(q2)); +} diff --git a/langchain/src/vectorstores/memory.ts b/langchain/src/vectorstores/memory.ts index a584aa5b4333..6d7c03db9b4f 100644 --- a/langchain/src/vectorstores/memory.ts +++ b/langchain/src/vectorstores/memory.ts @@ -1,7 +1,7 @@ -import { similarity as ml_distance_similarity } from "ml-distance"; import { VectorStore } from "@langchain/core/vectorstores"; import type { EmbeddingsInterface } from "@langchain/core/embeddings"; import { Document } from "@langchain/core/documents"; +import { cosine } from "../util/ml-distance/similarities.js"; /** * Interface representing a vector in memory. It includes the content @@ -21,7 +21,7 @@ interface MemoryVector { * function. */ export interface MemoryVectorStoreArgs { - similarity?: typeof ml_distance_similarity.cosine; + similarity?: typeof cosine; } /** @@ -34,7 +34,7 @@ export class MemoryVectorStore extends VectorStore { memoryVectors: MemoryVector[] = []; - similarity: typeof ml_distance_similarity.cosine; + similarity: typeof cosine; _vectorstoreType(): string { return "memory"; @@ -46,7 +46,7 @@ export class MemoryVectorStore extends VectorStore { ) { super(embeddings, rest); - this.similarity = similarity ?? ml_distance_similarity.cosine; + this.similarity = similarity ?? cosine; } /** diff --git a/langchain/src/vectorstores/tests/memory.test.ts b/langchain/src/vectorstores/tests/memory.test.ts index d1f43bb4740e..a5134e0363c3 100644 --- a/langchain/src/vectorstores/tests/memory.test.ts +++ b/langchain/src/vectorstores/tests/memory.test.ts @@ -2,8 +2,8 @@ import { test, expect } from "@jest/globals"; import { Document, DocumentInterface } from "@langchain/core/documents"; import { SyntheticEmbeddings } from "@langchain/core/utils/testing"; -import { similarity } from "ml-distance"; import { MemoryVectorStore } from "../memory.js"; +import { cosine } from "../../util/ml-distance/similarities.js"; test("MemoryVectorStore with external ids", async () => { const embeddings = new SyntheticEmbeddings({ @@ -75,10 +75,10 @@ test("MemoryVectorStore with custom similarity", async () => { let similarityCalled = false; let similarityCalledCount = 0; const store = new MemoryVectorStore(embeddings, { - similarity: (a: number, b: number) => { + similarity: (a: number[], b: number[]) => { similarityCalledCount += 1; similarityCalled = true; - return similarity.cosine(a, b); + return cosine(a, b); }, }); diff --git a/libs/langchain-anthropic/package.json b/libs/langchain-anthropic/package.json index 19dc57c51eb9..7c38557109fa 100644 --- a/libs/langchain-anthropic/package.json +++ b/libs/langchain-anthropic/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/anthropic", - "version": "0.2.11", + "version": "0.2.12", "description": "Anthropic integrations for LangChain.js", "type": "module", "engines": { diff --git a/libs/langchain-anthropic/src/output_parsers.ts b/libs/langchain-anthropic/src/output_parsers.ts index c5608900b4b9..b4a08be4d6ee 100644 --- a/libs/langchain-anthropic/src/output_parsers.ts +++ b/libs/langchain-anthropic/src/output_parsers.ts @@ -39,10 +39,28 @@ export class AnthropicToolsOutputParser< } protected async _validateResult(result: unknown): Promise { + let parsedResult = result; + if (typeof result === "string") { + try { + parsedResult = JSON.parse(result); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (e: any) { + throw new OutputParserException( + `Failed to parse. Text: "${JSON.stringify( + result, + null, + 2 + )}". Error: ${JSON.stringify(e.message)}`, + result + ); + } + } else { + parsedResult = result; + } if (this.zodSchema === undefined) { - return result as T; + return parsedResult as T; } - const zodParsedResult = await this.zodSchema.safeParseAsync(result); + const zodParsedResult = await this.zodSchema.safeParseAsync(parsedResult); if (zodParsedResult.success) { return zodParsedResult.data; } else { @@ -52,7 +70,7 @@ export class AnthropicToolsOutputParser< null, 2 )}". Error: ${JSON.stringify(zodParsedResult.error.errors)}`, - JSON.stringify(result, null, 2) + JSON.stringify(parsedResult, null, 2) ); } } diff --git a/libs/langchain-anthropic/src/tests/chat_models-tools.int.test.ts b/libs/langchain-anthropic/src/tests/chat_models-tools.int.test.ts index 3af61e38f04e..7cd53c99a8b6 100644 --- a/libs/langchain-anthropic/src/tests/chat_models-tools.int.test.ts +++ b/libs/langchain-anthropic/src/tests/chat_models-tools.int.test.ts @@ -38,7 +38,7 @@ class WeatherTool extends StructuredTool { } const model = new ChatAnthropic({ - modelName: "claude-3-sonnet-20240229", + modelName: "claude-3-haiku-20240307", temperature: 0, }); @@ -440,3 +440,24 @@ test("llm token callbacks can handle tool calls", async () => { if (!args) return; expect(args).toEqual(JSON.parse(tokens)); }); + +test("streaming with structured output", async () => { + const stream = await model + .withStructuredOutput(zodSchema) + .stream("weather in london"); + // Currently, streaming yields a single chunk + let finalChunk; + for await (const chunk of stream) { + finalChunk = chunk; + } + expect(typeof finalChunk).toEqual("object"); + const stream2 = await model + .withStructuredOutput(zodToJsonSchema(zodSchema)) + .stream("weather in london"); + // Currently, streaming yields a single chunk + let finalChunk2; + for await (const chunk of stream2) { + finalChunk2 = chunk; + } + expect(typeof finalChunk2).toEqual("object"); +}); diff --git a/libs/langchain-baidu-qianfan/package.json b/libs/langchain-baidu-qianfan/package.json index a78ca4b7674c..9f18d20ba845 100644 --- a/libs/langchain-baidu-qianfan/package.json +++ b/libs/langchain-baidu-qianfan/package.json @@ -57,7 +57,6 @@ "eslint-plugin-prettier": "^4.2.1", "jest": "^29.5.0", "jest-environment-node": "^29.6.4", - "langchain": "0.2.5", "prettier": "^2.8.3", "release-it": "^15.10.1", "rollup": "^4.5.2", diff --git a/libs/langchain-google-common/src/types.ts b/libs/langchain-google-common/src/types.ts index a07c32e67555..305b7a7eda78 100644 --- a/libs/langchain-google-common/src/types.ts +++ b/libs/langchain-google-common/src/types.ts @@ -45,6 +45,7 @@ export interface GoogleConnectionParams export interface GoogleAISafetySetting { category: string; threshold: string; + method?: string; } export type GoogleAIResponseMimeType = "text/plain" | "application/json"; diff --git a/libs/langchain-scripts/src/cli/docs/document_loaders.ts b/libs/langchain-scripts/src/cli/docs/document_loaders.ts index 359b94034e44..9fe5118af0b4 100644 --- a/libs/langchain-scripts/src/cli/docs/document_loaders.ts +++ b/libs/langchain-scripts/src/cli/docs/document_loaders.ts @@ -9,24 +9,24 @@ import { } from "../utils/get-input.js"; const NODE_OR_WEB_PLACEHOLDER = "__fs_or_web__"; -const PACKAGE_NAME_PLACEHOLDER = "__package_name__"; +const NODE_OR_WEB_IMPORT_PATH_PLACEHOLDER = "__fs_or_web_import_path__"; +const FILE_NAME_PLACEHOLDER = "__file_name__"; const MODULE_NAME_PLACEHOLDER = "__ModuleName__"; -const PACKAGE_NAME_SHORT_SNAKE_CASE_PLACEHOLDER = - "__package_name_short_snake_case__"; -const PACKAGE_NAME_SNAKE_CASE_PLACEHOLDER = "__package_name_snake_case__"; -const PACKAGE_IMPORT_PATH_PLACEHOLDER = "__import_path__"; -// This should not be prefixed with `Chat` as it's used for API keys. -const MODULE_NAME_ALL_CAPS_PLACEHOLDER = "__MODULE_NAME_ALL_CAPS__"; +const API_REF_BASE_PACKAGE_URL = `https://api.js.langchain.com/modules/langchain_community_document_loaders_${NODE_OR_WEB_PLACEHOLDER}_${FILE_NAME_PLACEHOLDER}.html`; +const API_REF_BASE_MODULE_URL = `https://v02.api.js.langchain.com/classes/langchain_community_document_loaders_${NODE_OR_WEB_PLACEHOLDER}_${FILE_NAME_PLACEHOLDER}.${MODULE_NAME_PLACEHOLDER}.html`; const SERIALIZABLE_PLACEHOLDER = "__serializable__"; const LOCAL_PLACEHOLDER = "__local__"; const PY_SUPPORT_PLACEHOLDER = "__py_support__"; -const WEB_SUPPORT_PLACEHOLDER = "__web_support__"; const NODE_SUPPORT_PLACEHOLDER = "__fs_support__"; -const API_REF_BASE_MODULE_URL = `https://api.js.langchain.com/classes/langchain_community_document_loaders_${NODE_OR_WEB_PLACEHOLDER}_${PACKAGE_NAME_PLACEHOLDER}.${MODULE_NAME_PLACEHOLDER}.html`; +const NODE_ONLY_SIDEBAR_BADGE_PLACEHOLDER = "__node_only_sidebar__"; +const NODE_ONLY_TOOL_TIP_PLACEHOLDER = "__node_only_tooltip__"; + +// This should not be suffixed with `Loader` as it's used for API keys. +const MODULE_NAME_ALL_CAPS_PLACEHOLDER = "__MODULE_NAME_ALL_CAPS__"; const TEMPLATE_PATH = path.resolve( "./src/cli/docs/templates/document_loaders.ipynb" @@ -35,6 +35,10 @@ const INTEGRATIONS_DOCS_PATH = path.resolve( "../../docs/core_docs/docs/integrations/document_loaders" ); +const NODE_ONLY_TOOLTIP = + "```{=mdx}\n\n:::tip Compatibility\n\nOnly available on Node.js.\n\n:::\n\n```\n"; +const NODE_ONLY_SIDEBAR_BADGE = `sidebar_class_name: node-only`; + const fetchAPIRefUrl = async (url: string): Promise => { try { const res = await fetch(url); @@ -48,25 +52,25 @@ const fetchAPIRefUrl = async (url: string): Promise => { }; type ExtraFields = { - nodeSupport: boolean; - webSupport: boolean; + webLoader: boolean; + nodeOnly: boolean; serializable: boolean; pySupport: boolean; local: boolean; }; async function promptExtraFields(): Promise { - const hasNodeSupport = await getUserInput( - "Does this integration support Node environments? (y/n) ", + const isWebLoader = await getUserInput( + "Is this integration a web loader? (y/n) ", undefined, true ); - const hasWebSupport = await getUserInput( - "Does this integration support web environments? (y/n) ", + const isNodeOnly = await getUserInput( + "Does this integration _only_ support Node environments? (y/n) ", undefined, true ); - const hasSerializable = await getUserInput( + const isSerializable = await getUserInput( "Does this integration support serializable output? (y/n) ", undefined, true @@ -83,9 +87,9 @@ async function promptExtraFields(): Promise { ); return { - nodeSupport: hasNodeSupport.toLowerCase() === "y", - webSupport: hasWebSupport.toLowerCase() === "y", - serializable: hasSerializable.toLowerCase() === "y", + webLoader: isWebLoader.toLowerCase() === "y", + nodeOnly: isNodeOnly.toLowerCase() === "y", + serializable: isSerializable.toLowerCase() === "y", pySupport: hasPySupport.toLowerCase() === "y", local: hasLocalSupport.toLowerCase() === "y", }; @@ -107,55 +111,64 @@ export async function fillDocLoaderIntegrationDocTemplate(fields: { extraFields = await promptExtraFields(); } + const formattedPackageApiRefUrl = API_REF_BASE_PACKAGE_URL.replace( + NODE_OR_WEB_PLACEHOLDER, + extraFields?.webLoader ? "web" : "fs" + ).replace(FILE_NAME_PLACEHOLDER, fields.packageName); + const formattedApiRefModuleUrl = API_REF_BASE_MODULE_URL.replace( - PACKAGE_NAME_PLACEHOLDER, - fields.packageName + NODE_OR_WEB_PLACEHOLDER, + extraFields?.webLoader ? "web" : "fs" ) - .replace(MODULE_NAME_PLACEHOLDER, fields.moduleName) - .replace(NODE_OR_WEB_PLACEHOLDER, extraFields?.webSupport ? "web" : "fs"); - - const success = await fetchAPIRefUrl(formattedApiRefModuleUrl); - if (!success) { + .replace(FILE_NAME_PLACEHOLDER, fields.packageName) + .replace(MODULE_NAME_PLACEHOLDER, fields.moduleName); + + const success = await Promise.all([ + fetchAPIRefUrl(formattedApiRefModuleUrl), + fetchAPIRefUrl(formattedPackageApiRefUrl), + ]); + if (success.find((s) => s === false)) { // Don't error out because this might be used before the package is released. console.error("Invalid package or module name. API reference not found."); } - const packageNameShortSnakeCase = fields.packageName.replaceAll("-", "_"); - const fullPackageNameSnakeCase = `langchain_community_document_loaders_${ - extraFields?.webSupport ? "web" : "fs" - }_${packageNameShortSnakeCase}`; - const fullPackageImportPath = `@langchain/community/document_loaders/${ - extraFields?.webSupport ? "web" : "fs" - }/${fields.packageName}`; - let moduleNameAllCaps = _.snakeCase(fields.moduleName).toUpperCase(); - if (moduleNameAllCaps.endsWith("DOCUMENT_LOADER")) { - moduleNameAllCaps = moduleNameAllCaps.replace("DOCUMENT_LOADER", ""); + if (moduleNameAllCaps.endsWith("_LOADER")) { + moduleNameAllCaps = moduleNameAllCaps.replace("_LOADER", ""); } const docTemplate = (await fs.promises.readFile(TEMPLATE_PATH, "utf-8")) - .replaceAll(PACKAGE_NAME_PLACEHOLDER, fields.packageName) - .replaceAll(PACKAGE_NAME_SNAKE_CASE_PLACEHOLDER, fullPackageNameSnakeCase) - .replaceAll( - PACKAGE_NAME_SHORT_SNAKE_CASE_PLACEHOLDER, - packageNameShortSnakeCase - ) - .replaceAll(PACKAGE_IMPORT_PATH_PLACEHOLDER, fullPackageImportPath) + .replaceAll(NODE_OR_WEB_PLACEHOLDER, extraFields?.webLoader ? "web" : "fs") .replaceAll(MODULE_NAME_PLACEHOLDER, fields.moduleName) .replaceAll(MODULE_NAME_ALL_CAPS_PLACEHOLDER, moduleNameAllCaps) - .replace(WEB_SUPPORT_PLACEHOLDER, extraFields?.webSupport ? "✅" : "❌") - .replace(NODE_SUPPORT_PLACEHOLDER, extraFields?.nodeSupport ? "✅" : "❌") - .replace(LOCAL_PLACEHOLDER, extraFields?.local ? "✅" : "❌") - .replace( + .replaceAll( + NODE_OR_WEB_IMPORT_PATH_PLACEHOLDER, + extraFields?.webLoader ? "web" : "fs" + ) + .replaceAll(FILE_NAME_PLACEHOLDER, fields.packageName) + .replaceAll( + NODE_ONLY_SIDEBAR_BADGE_PLACEHOLDER, + extraFields?.nodeOnly ? NODE_ONLY_SIDEBAR_BADGE : "" + ) + .replaceAll( + NODE_ONLY_TOOL_TIP_PLACEHOLDER, + extraFields?.nodeOnly ? NODE_ONLY_TOOLTIP : "" + ) + .replaceAll( + NODE_SUPPORT_PLACEHOLDER, + extraFields?.nodeOnly ? "Node-only" : "All environments" + ) + .replaceAll(LOCAL_PLACEHOLDER, extraFields?.local ? "✅" : "❌") + .replaceAll( SERIALIZABLE_PLACEHOLDER, - extraFields?.serializable ? "✅" : "beta" + extraFields?.serializable ? "beta" : "❌" ) - .replace(PY_SUPPORT_PLACEHOLDER, extraFields?.pySupport ? "✅" : "❌"); + .replaceAll(PY_SUPPORT_PLACEHOLDER, extraFields?.pySupport ? "✅" : "❌"); const docPath = path.join( INTEGRATIONS_DOCS_PATH, - extraFields?.webSupport ? "web_loaders" : "file_loaders", - `${packageNameShortSnakeCase}.ipynb` + extraFields?.webLoader ? "web_loaders" : "file_loaders", + `${fields.packageName}.ipynb` ); await fs.promises.writeFile(docPath, docTemplate); const prettyDocPath = docPath.split("docs/core_docs/")[1]; diff --git a/libs/langchain-scripts/src/cli/docs/embeddings.ts b/libs/langchain-scripts/src/cli/docs/embeddings.ts new file mode 100644 index 000000000000..03e1cc96f99e --- /dev/null +++ b/libs/langchain-scripts/src/cli/docs/embeddings.ts @@ -0,0 +1,186 @@ +import * as path from "node:path"; +import * as fs from "node:fs"; +import { + boldText, + getUserInput, + greenText, + redBackground, +} from "../utils/get-input.js"; + +const PACKAGE_NAME_PLACEHOLDER = "__package_name__"; +const MODULE_NAME_PLACEHOLDER = "__ModuleName__"; +const SIDEBAR_LABEL_PLACEHOLDER = "__sidebar_label__"; +const FULL_IMPORT_PATH_PLACEHOLDER = "__full_import_path__"; +const LOCAL_PLACEHOLDER = "__local__"; +const PY_SUPPORT_PLACEHOLDER = "__py_support__"; +const ENV_VAR_NAME_PLACEHOLDER = "__env_var_name__"; +const API_REF_MODULE_PLACEHOLDER = "__api_ref_module__"; +const API_REF_PACKAGE_PLACEHOLDER = "__api_ref_package__"; +const PYTHON_DOC_URL_PLACEHOLDER = "__python_doc_url__"; + +const TEMPLATE_PATH = path.resolve( + "./src/cli/docs/templates/text_embedding.ipynb" +); +const INTEGRATIONS_DOCS_PATH = path.resolve( + "../../docs/core_docs/docs/integrations/text_embedding" +); + +const fetchAPIRefUrl = async (url: string): Promise => { + try { + const res = await fetch(url); + if (res.status !== 200) { + throw new Error(`API Reference URL ${url} not found.`); + } + return true; + } catch (_) { + return false; + } +}; + +type ExtraFields = { + local: boolean; + pySupport: boolean; + packageName: string; + fullImportPath?: string; + envVarName: string; +}; + +async function promptExtraFields(fields: { + envVarGuess: string; + packageNameGuess: string; + isCommunity: boolean; +}): Promise { + const { envVarGuess, packageNameGuess, isCommunity } = fields; + const canRunLocally = await getUserInput( + "Does this embeddings model support local usage? (y/n) ", + undefined, + true + ); + const hasPySupport = await getUserInput( + "Does this integration have Python support? (y/n) ", + undefined, + true + ); + + let packageName = packageNameGuess; + if (!isCommunity) { + // If it's not community, get the package name. + + const isOtherPackageName = await getUserInput( + `Is this integration part of the ${packageNameGuess} package? (y/n) ` + ); + if (isOtherPackageName.toLowerCase() === "n") { + packageName = await getUserInput( + "What is the name of the package this integration is located in? (e.g @langchain/openai) ", + undefined, + true + ); + if ( + !packageName.startsWith("@langchain/") && + !packageName.startsWith("langchain/") + ) { + packageName = await getUserInput( + "Packages must start with either '@langchain/' or 'langchain/'. Please enter a valid package name: ", + undefined, + true + ); + } + } + } + + // If it's community or langchain, ask for the full import path + let fullImportPath: string | undefined; + if ( + packageName.startsWith("@langchain/community") || + packageName.startsWith("langchain/") + ) { + fullImportPath = await getUserInput( + "What is the full import path of the package? (e.g '@langchain/community/embeddings/togetherai') ", + undefined, + true + ); + } + + const envVarName = await getUserInput( + `Is the environment variable for the API key named ${envVarGuess}? If it is, reply with 'y', else reply with the correct name: `, + undefined, + true + ); + + return { + local: canRunLocally.toLowerCase() === "y", + pySupport: hasPySupport.toLowerCase() === "y", + packageName, + fullImportPath, + envVarName: + envVarName.toLowerCase() === "y" ? envVarGuess : envVarName.toUpperCase(), + }; +} + +export async function fillEmbeddingsIntegrationDocTemplate(fields: { + packageName: string; + moduleName: string; + isCommunity: boolean; +}) { + const sidebarLabel = fields.moduleName.replace("Embeddings", ""); + const pyDocUrl = `https://python.langchain.com/docs/integrations/text_embedding/${sidebarLabel.toLowerCase()}/`; + let envVarName = `${sidebarLabel.toUpperCase()}_API_KEY`; + const extraFields = await promptExtraFields({ + packageNameGuess: `@langchain/${fields.packageName}`, + envVarGuess: envVarName, + isCommunity: fields.isCommunity, + }); + envVarName = extraFields.envVarName; + const { pySupport } = extraFields; + const localSupport = extraFields.local; + const { packageName } = extraFields; + const fullImportPath = extraFields.fullImportPath ?? extraFields.packageName; + + const apiRefModuleUrl = `https://api.js.langchain.com/classes/${fullImportPath + .replace("@", "") + .replaceAll("/", "_") + .replaceAll("-", "_")}.${fields.moduleName}.html`; + const apiRefPackageUrl = apiRefModuleUrl + .replace("/classes/", "/modules/") + .replace(`.${fields.moduleName}.html`, ".html"); + + const apiRefUrlSuccesses = await Promise.all([ + fetchAPIRefUrl(apiRefModuleUrl), + fetchAPIRefUrl(apiRefPackageUrl), + ]); + if (apiRefUrlSuccesses.find((s) => !s)) { + console.warn( + "API ref URLs invalid. Please manually ensure they are correct." + ); + } + + const docTemplate = (await fs.promises.readFile(TEMPLATE_PATH, "utf-8")) + .replaceAll(PACKAGE_NAME_PLACEHOLDER, packageName) + .replaceAll(MODULE_NAME_PLACEHOLDER, fields.moduleName) + .replaceAll(SIDEBAR_LABEL_PLACEHOLDER, sidebarLabel) + .replaceAll(FULL_IMPORT_PATH_PLACEHOLDER, fullImportPath) + .replaceAll(LOCAL_PLACEHOLDER, localSupport ? "✅" : "❌") + .replaceAll(PY_SUPPORT_PLACEHOLDER, pySupport ? "✅" : "❌") + .replaceAll(ENV_VAR_NAME_PLACEHOLDER, envVarName) + .replaceAll(API_REF_MODULE_PLACEHOLDER, apiRefModuleUrl) + .replaceAll(API_REF_PACKAGE_PLACEHOLDER, apiRefPackageUrl) + .replaceAll(PYTHON_DOC_URL_PLACEHOLDER, pyDocUrl); + + const docFileName = fullImportPath.split("/").pop(); + const docPath = path.join(INTEGRATIONS_DOCS_PATH, `${docFileName}.ipynb`); + await fs.promises.writeFile(docPath, docTemplate); + const prettyDocPath = docPath.split("docs/core_docs/")[1]; + + const updatePythonDocUrlText = ` ${redBackground( + "- Update the Python documentation URL with the proper URL." + )}`; + const successText = `\nSuccessfully created new chat model integration doc at ${prettyDocPath}.`; + + console.log( + `${greenText(successText)}\n +${boldText("Next steps:")} +${extraFields?.pySupport ? updatePythonDocUrlText : ""} + - Run all code cells in the generated doc to record the outputs. + - Add extra sections on integration specific features.\n` + ); +} diff --git a/libs/langchain-scripts/src/cli/docs/index.ts b/libs/langchain-scripts/src/cli/docs/index.ts index a7a89745e7a1..9d7f5aa35a12 100644 --- a/libs/langchain-scripts/src/cli/docs/index.ts +++ b/libs/langchain-scripts/src/cli/docs/index.ts @@ -4,6 +4,9 @@ import { Command } from "commander"; import { fillChatIntegrationDocTemplate } from "./chat.js"; import { fillDocLoaderIntegrationDocTemplate } from "./document_loaders.js"; +import { fillLLMIntegrationDocTemplate } from "./llms.js"; +import { fillRetrieverIntegrationDocTemplate } from "./retrievers.js"; +import { fillEmbeddingsIntegrationDocTemplate } from "./embeddings.js"; type CLIInput = { package: string; @@ -49,9 +52,30 @@ async function main() { moduleName, }); break; + case "llm": + await fillLLMIntegrationDocTemplate({ + packageName, + moduleName, + isCommunity, + }); + break; + case "retriever": + await fillRetrieverIntegrationDocTemplate({ + packageName, + moduleName, + isCommunity, + }); + break; + case "embeddings": + await fillEmbeddingsIntegrationDocTemplate({ + packageName, + moduleName, + isCommunity, + }); + break; default: console.error( - `Invalid type: ${type}.\nOnly 'chat' and 'doc_loader' are supported at this time.` + `Invalid type: ${type}.\nOnly 'chat', 'llm', 'retriever', 'embeddings' and 'doc_loader' are supported at this time.` ); process.exit(1); } diff --git a/libs/langchain-scripts/src/cli/docs/llms.ts b/libs/langchain-scripts/src/cli/docs/llms.ts new file mode 100644 index 000000000000..bd538bb55ac1 --- /dev/null +++ b/libs/langchain-scripts/src/cli/docs/llms.ts @@ -0,0 +1,179 @@ +import * as path from "node:path"; +import * as fs from "node:fs"; +import { + boldText, + getUserInput, + greenText, + redBackground, +} from "../utils/get-input.js"; + +const PACKAGE_NAME_PLACEHOLDER = "__package_name__"; +const PACKAGE_NAME_SHORT_SNAKE_CASE_PLACEHOLDER = + "__package_name_short_snake_case__"; +const PACKAGE_NAME_SNAKE_CASE_PLACEHOLDER = "__package_name_snake_case__"; +const PACKAGE_NAME_PRETTY_PLACEHOLDER = "__package_name_pretty__"; +const PACKAGE_IMPORT_PATH_PLACEHOLDER = "__import_path__"; +const MODULE_NAME_PLACEHOLDER = "__ModuleName__"; +// This should not be prefixed with `Chat` as it's used for API keys. +const MODULE_NAME_ALL_CAPS_PLACEHOLDER = "__MODULE_NAME_ALL_CAPS__"; + +const SERIALIZABLE_PLACEHOLDER = "__serializable__"; +const LOCAL_PLACEHOLDER = "__local__"; +const PY_SUPPORT_PLACEHOLDER = "__py_support__"; + +const API_REF_BASE_PACKAGE_URL = `https://api.js.langchain.com/modules/langchain_${PACKAGE_NAME_PLACEHOLDER}.html`; +const API_REF_BASE_MODULE_URL = `https://api.js.langchain.com/classes/langchain_${PACKAGE_NAME_PLACEHOLDER}.${MODULE_NAME_PLACEHOLDER}.html`; + +const TEMPLATE_PATH = path.resolve("./src/cli/docs/templates/llms.ipynb"); +const INTEGRATIONS_DOCS_PATH = path.resolve( + "../../docs/core_docs/docs/integrations/llms" +); + +const fetchAPIRefUrl = async (url: string): Promise => { + try { + const res = await fetch(url); + if (res.status !== 200) { + throw new Error(`API Reference URL ${url} not found.`); + } + return true; + } catch (_) { + return false; + } +}; + +type ExtraFields = { + local: boolean; + serializable: boolean; + pySupport: boolean; +}; + +async function promptExtraFields(): Promise { + const hasLocal = await getUserInput( + "Does this integration support local usage? (y/n) ", + undefined, + true + ); + const hasSerializable = await getUserInput( + "Does this integration support serializable output? (y/n) ", + undefined, + true + ); + const hasPySupport = await getUserInput( + "Does this integration have Python support? (y/n) ", + undefined, + true + ); + + return { + local: hasLocal.toLowerCase() === "y", + serializable: hasSerializable.toLowerCase() === "y", + pySupport: hasPySupport.toLowerCase() === "y", + }; +} + +export async function fillLLMIntegrationDocTemplate(fields: { + packageName: string; + moduleName: string; + isCommunity: boolean; +}) { + // Ask the user if they'd like to fill in extra fields, if so, prompt them. + let extraFields: ExtraFields | undefined; + const shouldPromptExtraFields = await getUserInput( + "Would you like to fill out optional fields? (y/n) ", + "white_background" + ); + if (shouldPromptExtraFields.toLowerCase() === "y") { + extraFields = await promptExtraFields(); + } + + let formattedApiRefPackageUrl = ""; + let formattedApiRefModuleUrl = ""; + if (fields.isCommunity) { + formattedApiRefPackageUrl = API_REF_BASE_PACKAGE_URL.replace( + PACKAGE_NAME_PLACEHOLDER, + `community_llms_${fields.packageName}` + ); + formattedApiRefModuleUrl = API_REF_BASE_MODULE_URL.replace( + PACKAGE_NAME_PLACEHOLDER, + `community_llms_${fields.packageName}` + ).replace(MODULE_NAME_PLACEHOLDER, fields.moduleName); + } else { + formattedApiRefPackageUrl = API_REF_BASE_PACKAGE_URL.replace( + PACKAGE_NAME_PLACEHOLDER, + fields.packageName + ); + formattedApiRefModuleUrl = API_REF_BASE_MODULE_URL.replace( + PACKAGE_NAME_PLACEHOLDER, + fields.packageName + ).replace(MODULE_NAME_PLACEHOLDER, fields.moduleName); + } + + const success = await Promise.all([ + fetchAPIRefUrl(formattedApiRefPackageUrl), + fetchAPIRefUrl(formattedApiRefModuleUrl), + ]); + if (success.some((s) => s === false)) { + // Don't error out because this might be used before the package is released. + console.error("Invalid package or module name. API reference not found."); + } + + const packageNameShortSnakeCase = fields.packageName.replaceAll("-", "_"); + let fullPackageNameSnakeCase = ""; + let packageNamePretty = ""; + let fullPackageImportPath = ""; + + if (fields.isCommunity) { + fullPackageNameSnakeCase = `langchain_community_llms_${packageNameShortSnakeCase}`; + fullPackageImportPath = `@langchain/community/llms/${fields.packageName}`; + packageNamePretty = "@langchain/community"; + } else { + fullPackageNameSnakeCase = `langchain_${packageNameShortSnakeCase}`; + packageNamePretty = `@langchain/${fields.packageName}`; + fullPackageImportPath = packageNamePretty; + } + + let moduleNameAllCaps = fields.moduleName.toUpperCase(); + if (moduleNameAllCaps.endsWith("_LLM")) { + moduleNameAllCaps = moduleNameAllCaps.replace("_LLM", ""); + } else if (moduleNameAllCaps.endsWith("LLM")) { + moduleNameAllCaps = moduleNameAllCaps.replace("LLM", ""); + } + + const docTemplate = (await fs.promises.readFile(TEMPLATE_PATH, "utf-8")) + .replaceAll(PACKAGE_NAME_PLACEHOLDER, fields.packageName) + .replaceAll(PACKAGE_NAME_SNAKE_CASE_PLACEHOLDER, fullPackageNameSnakeCase) + .replaceAll( + PACKAGE_NAME_SHORT_SNAKE_CASE_PLACEHOLDER, + packageNameShortSnakeCase + ) + .replaceAll(PACKAGE_NAME_PRETTY_PLACEHOLDER, packageNamePretty) + .replaceAll(PACKAGE_IMPORT_PATH_PLACEHOLDER, fullPackageImportPath) + .replaceAll(MODULE_NAME_PLACEHOLDER, fields.moduleName) + .replaceAll(MODULE_NAME_ALL_CAPS_PLACEHOLDER, moduleNameAllCaps) + .replace(LOCAL_PLACEHOLDER, extraFields?.local ? "✅" : "❌") + .replace( + SERIALIZABLE_PLACEHOLDER, + extraFields?.serializable ? "✅" : "beta" + ) + .replace(PY_SUPPORT_PLACEHOLDER, extraFields?.pySupport ? "✅" : "❌"); + + const docPath = path.join( + INTEGRATIONS_DOCS_PATH, + `${packageNameShortSnakeCase}.ipynb` + ); + await fs.promises.writeFile(docPath, docTemplate); + const prettyDocPath = docPath.split("docs/core_docs/")[1]; + + const updatePythonDocUrlText = ` ${redBackground( + "- Update the Python documentation URL with the proper URL." + )}`; + const successText = `\nSuccessfully created new chat model integration doc at ${prettyDocPath}.`; + + console.log( + `${greenText(successText)}\n +${boldText("Next steps:")} +${extraFields?.pySupport ? updatePythonDocUrlText : ""} + - Run all code cells in the generated doc to record the outputs. + - Add extra sections on integration specific features.\n` + ); +} diff --git a/libs/langchain-scripts/src/cli/docs/retrievers.ts b/libs/langchain-scripts/src/cli/docs/retrievers.ts new file mode 100644 index 000000000000..fc68cb96d5e3 --- /dev/null +++ b/libs/langchain-scripts/src/cli/docs/retrievers.ts @@ -0,0 +1,181 @@ +import * as path from "node:path"; +import * as fs from "node:fs"; +import { + boldText, + getUserInput, + greenText, + redBackground, +} from "../utils/get-input.js"; + +const PACKAGE_NAME_PLACEHOLDER = "__package_name__"; +const MODULE_NAME_PLACEHOLDER = "__ModuleName__"; +const SIDEBAR_LABEL_PLACEHOLDER = "__sidebar_label__"; +const FULL_IMPORT_PATH_PLACEHOLDER = "__full_import_path__"; +const HAS_CLOUD_OFFERING_PLACEHOLDER = "__has_cloud_offering__"; +const CAN_SELF_HOST_PLACEHOLDER = "__can_self_host__"; +const PY_SUPPORT_PLACEHOLDER = "__py_support__"; +const API_REF_MODULE_PLACEHOLDER = "__api_ref_module__"; +const PYTHON_DOC_URL_PLACEHOLDER = "__python_doc_url__"; + +const fetchAPIRefUrl = async (url: string): Promise => { + try { + const res = await fetch(url); + if (res.status !== 200) { + throw new Error(`API Reference URL ${url} not found.`); + } + return true; + } catch (_) { + return false; + } +}; + +const TEMPLATE_PATH = path.resolve("./src/cli/docs/templates/retrievers.ipynb"); +const INTEGRATIONS_DOCS_PATH = path.resolve( + "../../docs/core_docs/docs/integrations/retrievers" +); + +type ExtraFields = { + packageName: string; + fullImportPath?: string; + hasCloudOffering: boolean; + canSelfHost: boolean; + pySupport: boolean; +}; + +async function promptExtraFields(fields: { + packageNameGuess: string; + isCommunity: boolean; +}): Promise { + const { packageNameGuess, isCommunity } = fields; + + const hasCloudOffering = await getUserInput( + "Does this retriever support self hosting? (y/n) ", + undefined, + true + ); + const canSelfHost = await getUserInput( + "Does this retriever have a cloud offering? (y/n) ", + undefined, + true + ); + const hasPySupport = await getUserInput( + "Does this integration have Python support? (y/n) ", + undefined, + true + ); + + let packageName = packageNameGuess; + if (!isCommunity) { + // If it's not community, get the package name. + + const isOtherPackageName = await getUserInput( + `Is this integration part of the ${packageNameGuess} package? (y/n) ` + ); + if (isOtherPackageName.toLowerCase() === "n") { + packageName = await getUserInput( + "What is the name of the package this integration is located in? (e.g @langchain/openai) ", + undefined, + true + ); + if ( + !packageName.startsWith("@langchain/") && + !packageName.startsWith("langchain/") + ) { + packageName = await getUserInput( + "Packages must start with either '@langchain/' or 'langchain/'. Please enter a valid package name: ", + undefined, + true + ); + } + } + } + + // If it's community or langchain, ask for the full import path + let fullImportPath: string | undefined; + if ( + packageName.startsWith("@langchain/community") || + packageName.startsWith("langchain/") + ) { + fullImportPath = await getUserInput( + "What is the full import path of the module? (e.g '@langchain/community/retrievers/tavily_search_api') ", + undefined, + true + ); + } + + return { + packageName, + fullImportPath, + canSelfHost: canSelfHost.toLowerCase() === "y", + hasCloudOffering: hasCloudOffering.toLowerCase() === "y", + pySupport: hasPySupport.toLowerCase() === "y", + }; +} + +export async function fillRetrieverIntegrationDocTemplate(fields: { + packageName: string; + moduleName: string; + isCommunity: boolean; +}) { + const sidebarLabel = fields.moduleName.replace("Retriever", ""); + const pyDocUrl = `https://python.langchain.com/v0.2/docs/integrations/retrievers/${sidebarLabel.toLowerCase()}/`; + const extraFields = await promptExtraFields({ + packageNameGuess: `@langchain/${fields.packageName}`, + isCommunity: fields.isCommunity, + }); + const { pySupport } = extraFields; + const { canSelfHost } = extraFields; + const { hasCloudOffering } = extraFields; + const { packageName } = extraFields; + const fullImportPath = extraFields.fullImportPath ?? extraFields.packageName; + + const apiRefModuleUrl = `https://api.js.langchain.com/classes/${fullImportPath + .replace("@", "") + .replaceAll("/", "_") + .replaceAll("-", "_")}.${fields.moduleName}.html`; + const apiRefPackageUrl = apiRefModuleUrl + .replace("/classes/", "/modules/") + .replace(`.${fields.moduleName}.html`, ".html"); + + const apiRefUrlSuccesses = await Promise.all([ + fetchAPIRefUrl(apiRefModuleUrl), + fetchAPIRefUrl(apiRefPackageUrl), + ]); + if (apiRefUrlSuccesses.find((s) => !s)) { + console.warn( + "API ref URLs invalid. Please manually ensure they are correct." + ); + } + + const docTemplate = (await fs.promises.readFile(TEMPLATE_PATH, "utf-8")) + .replaceAll(PACKAGE_NAME_PLACEHOLDER, packageName) + .replaceAll(MODULE_NAME_PLACEHOLDER, fields.moduleName) + .replaceAll(SIDEBAR_LABEL_PLACEHOLDER, sidebarLabel) + .replaceAll(FULL_IMPORT_PATH_PLACEHOLDER, fullImportPath) + .replace(HAS_CLOUD_OFFERING_PLACEHOLDER, hasCloudOffering ? "✅" : "❌") + .replace(CAN_SELF_HOST_PLACEHOLDER, canSelfHost ? "✅" : "❌") + .replace(PY_SUPPORT_PLACEHOLDER, pySupport ? "✅" : "❌") + .replaceAll(API_REF_MODULE_PLACEHOLDER, apiRefModuleUrl) + .replaceAll(PYTHON_DOC_URL_PLACEHOLDER, pyDocUrl); + + const packageNameShortSnakeCase = fields.packageName.replace(/-/g, "_"); + const docPath = path.join( + INTEGRATIONS_DOCS_PATH, + `${packageNameShortSnakeCase}.ipynb` + ); + await fs.promises.writeFile(docPath, docTemplate); + const prettyDocPath = docPath.split("docs/core_docs/")[1]; + + const updatePythonDocUrlText = ` ${redBackground( + "- Update the Python documentation URL with the proper URL." + )}`; + const successText = `\nSuccessfully created new chat model integration doc at ${prettyDocPath}.`; + + console.log( + `${greenText(successText)}\n +${boldText("Next steps:")} +${extraFields?.pySupport ? updatePythonDocUrlText : ""} + - Run all code cells in the generated doc to record the outputs. + - Add extra sections on integration specific features.\n` + ); +} diff --git a/libs/langchain-scripts/src/cli/docs/templates/document_loaders.ipynb b/libs/langchain-scripts/src/cli/docs/templates/document_loaders.ipynb index bc49c5c0801c..09781453dea8 100644 --- a/libs/langchain-scripts/src/cli/docs/templates/document_loaders.ipynb +++ b/libs/langchain-scripts/src/cli/docs/templates/document_loaders.ipynb @@ -1,11 +1,12 @@ { "cells": [ { - "cell_type": "markdown", + "cell_type": "raw", "metadata": {}, "source": [ "---\n", "sidebar_label: __ModuleName__\n", + "__node_only_sidebar__\n", "---" ] }, @@ -17,7 +18,9 @@ "\n", "- TODO: Make sure API reference link is correct.\n", "\n", - "This notebook provides a quick overview for getting started with [__ModuleName__](/docs/integrations/document_loaders/). For detailed documentation of all __ModuleName__ features and configurations head to the [API reference](https://api.js.langchain.com/classes/__package_name_snake_case__.__ModuleName__.html).\n", + "__node_only_tooltip__\n", + "\n", + "This notebook provides a quick overview for getting started with `__ModuleName__` [document loaders](/docs/concepts/#document-loaders). For detailed documentation of all `__ModuleName__` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_community_document_loaders___fs_or_web_____file_name__.__ModuleName__.html).\n", "\n", "- TODO: Add any other relevant links, like information about underlying API, etc.\n", "\n", @@ -28,13 +31,9 @@ "- TODO: Remove JS support link if not relevant, otherwise ensure link is correct.\n", "- TODO: Make sure API reference links are correct.\n", "\n", - "| Class | Package | Local | Serializable | [PY support](https://python.langchain.com/docs/integrations/document_loaders/__package_name_short_snake_case__)|\n", + "| Class | Package | Compatibility | Local | [PY support](https://python.langchain.com/docs/integrations/document_loaders/__file_name__)| \n", "| :--- | :--- | :---: | :---: | :---: |\n", - "| [__ModuleName__](https://api.js.langchain.com/classes/__package_name_snake_case__.__ModuleName__.html) | @langchain/community | __local__ | __serializable__ | __py_support__ | \n", - "### Loader features\n", - "| Source | Web Support | Node Support\n", - "| :---: | :---: | :---: | \n", - "| __ModuleName__ | __web_support__ | __fs_support__ | \n", + "| [__ModuleName__](https://api.js.langchain.com/classes/langchain_community_document_loaders___fs_or_web_____file_name__.__ModuleName__.html) | [@langchain/community](https://api.js.langchain.com/modules/langchain_community_document_loaders___fs_or_web_____file_name__.html) | __fs_support__ | __local__ | __py_support__ |\n", "\n", "## Setup\n", "\n", @@ -52,13 +51,6 @@ "export __MODULE_NAME_ALL_CAPS___API_KEY=\"your-api-key\"\n", "```\n", "\n", - "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", - "\n", - "```bash\n", - "# export LANGCHAIN_TRACING_V2=\"true\"\n", - "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", - "```\n", - "\n", "### Installation\n", "\n", "The LangChain __ModuleName__ integration lives in the `@langchain/community` package:\n", @@ -97,7 +89,7 @@ }, "outputs": [], "source": [ - "import { __ModuleName__ } from \"__import_path__\"\n", + "import { __ModuleName__ } from \"@langchain/community/document_loaders/__fs_or_web_import_path__/__file_name__\"\n", "\n", "const loader = new __ModuleName__({\n", " // required params = ...\n", @@ -156,7 +148,7 @@ "source": [ "## API reference\n", "\n", - "For detailed documentation of all __ModuleName__ features and configurations head to the API reference: https://api.js.langchain.com/classes/__package_name_snake_case__.__ModuleName__.html" + "For detailed documentation of all __ModuleName__ features and configurations head to the API reference: https://api.js.langchain.com/classes/langchain_community_document_loaders___fs_or_web_____file_name__.__ModuleName__.html" ] }, { diff --git a/libs/langchain-scripts/src/cli/docs/templates/llms.ipynb b/libs/langchain-scripts/src/cli/docs/templates/llms.ipynb new file mode 100644 index 000000000000..eac6a24b611d --- /dev/null +++ b/libs/langchain-scripts/src/cli/docs/templates/llms.ipynb @@ -0,0 +1,221 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "67db2992", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: __ModuleName__\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "9597802c", + "metadata": {}, + "source": [ + "# __ModuleName__\n", + "\n", + "- [ ] TODO: Make sure API reference link is correct\n", + "\n", + "This will help you get started with __ModuleName__ completion models (LLMs) using LangChain. For detailed documentation on `__ModuleName__` features and configuration options, please refer to the [API reference](https://api.js.langchain.com/classes/__package_name_snake_case__.__ModuleName__.html).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "- TODO: Fill in table features.\n", + "- TODO: Remove JS support link if not relevant, otherwise ensure link is correct.\n", + "- TODO: Make sure API reference links are correct.\n", + "\n", + "| Class | Package | Local | Serializable | [PY support](https://python.langchain.com/docs/integrations/llms/__package_name_short_snake_case__) | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n", + "| [__ModuleName__](https://api.js.langchain.com/classes/__package_name_snake_case__.__ModuleName__.html) | [__package_name_pretty__](https://api.js.langchain.com/modules/__package_name_snake_case__.html) | __local__ | __serializable__ | __py_support__ | ![NPM - Downloads](https://img.shields.io/npm/dm/__package_name_pretty__?style=flat-square&label=%20&) | ![NPM - Version](https://img.shields.io/npm/v/__package_name_pretty__?style=flat-square&label=%20&) |\n", + "\n", + "## Setup\n", + "\n", + "- [ ] TODO: Update with relevant info.\n", + "\n", + "To access __ModuleName__ models you'll need to create a/an __ModuleName__ account, get an API key, and install the `__package_name_pretty__` integration package.\n", + "\n", + "### Credentials\n", + "\n", + "- TODO: Update with relevant info.\n", + "\n", + "Head to (TODO: link) to sign up to __ModuleName__ and generate an API key. Once you've done this set the `__MODULE_NAME_ALL_CAPS___API_KEY` environment variable:\n", + "\n", + "```bash\n", + "export __MODULE_NAME_ALL_CAPS___API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```bash\n", + "# export LANGCHAIN_TRACING_V2=\"true\"\n", + "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain __ModuleName__ integration lives in the `__package_name_pretty__` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " __package_name_pretty__\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0a760037", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and generate chat completions:\n", + "\n", + "- TODO: Update model instantiation with relevant params." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0562a13", + "metadata": { + "vscode": { + "languageId": "typescript" + } + }, + "outputs": [], + "source": [ + "import { __ModuleName__ } from \"__import_path__\"\n", + "\n", + "const llm = new __ModuleName__({\n", + " model: \"model-name\",\n", + " temperature: 0,\n", + " maxTokens: undefined,\n", + " timeout: undefined,\n", + " maxRetries: 2,\n", + " // other params...\n", + "})" + ] + }, + { + "cell_type": "markdown", + "id": "0ee90032", + "metadata": {}, + "source": [ + "## Invocation\n", + "\n", + "- [ ] TODO: Run cells so output can be seen." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "035dea0f", + "metadata": { + "tags": [], + "vscode": { + "languageId": "typescript" + } + }, + "outputs": [], + "source": [ + "const inputText = \"__ModuleName__ is an AI company that \"\n", + "\n", + "const completion = await llm.invoke(inputText)\n", + "completion" + ] + }, + { + "cell_type": "markdown", + "id": "add38532", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "We can [chain](/docs/how_to/sequence/) our completion model with a prompt template like so:\n", + "\n", + "- TODO: Run cells so output can be seen." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "078e9db2", + "metadata": { + "vscode": { + "languageId": "typescript" + } + }, + "outputs": [], + "source": [ + "import { PromptTemplate } from \"@langchain/core/prompts\"\n", + "\n", + "const prompt = PromptTemplate.fromTemplate(\"How to say {input} in {output_language}:\\n\")\n", + "\n", + "const chain = prompt.pipe(llm);\n", + "await chain.invoke(\n", + " {\n", + " output_language: \"German\",\n", + " input: \"I love programming.\",\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e99eef30", + "metadata": {}, + "source": [ + "## TODO: Any functionality specific to this model provider\n", + "\n", + "E.g. creating/using finetuned models via this provider. Delete if not relevant" + ] + }, + { + "cell_type": "markdown", + "id": "e9bdfcef", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all __ModuleName__ features and configurations head to the API reference: https://api.js.langchain.com/classes/__package_name_snake_case__.__ModuleName__.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.1 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/langchain-scripts/src/cli/docs/templates/retrievers.ipynb b/libs/langchain-scripts/src/cli/docs/templates/retrievers.ipynb new file mode 100644 index 000000000000..236fec3abfc4 --- /dev/null +++ b/libs/langchain-scripts/src/cli/docs/templates/retrievers.ipynb @@ -0,0 +1,242 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: __sidebar_label__\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# __ModuleName__\n", + "\n", + "## Overview\n", + "\n", + "- TODO: Make sure API reference link is correct.\n", + "\n", + "This will help you getting started with the [__ModuleName__](/docs/concepts/#retrievers). For detailed documentation of all __ModuleName__ features and configurations head to the [API reference](__api_ref_module__).\n", + "\n", + "### Integration details\n", + "\n", + "TODO: Select one of the tables below, as appropriate.\n", + "\n", + "1: Bring-your-own data (i.e., index and search a custom corpus of documents):\n", + "\n", + "| Retriever | Self-host | Cloud offering | Package | [Py support](__python_doc_url__) |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "[__ModuleName__](__api_ref_module__) | __can_self_host__ | __has_cloud_offering__ | __package_name__ | __py_support__ |\n", + "\n", + "2: External index (e.g., constructed from Internet data or similar):\n", + "\n", + "| Retriever | Source | Package |\n", + "| :--- | :--- | :---: |\n", + "[__ModuleName__](__api_ref_module__) | Source description | __package_name__ |\n", + "\n", + "## Setup\n", + "\n", + "- TODO: Update with relevant info.\n", + "\n", + "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGSMITH_API_KEY = \"\";\n", + "// process.env.LANGSMITH_TRACING = \"true\";\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "This retriever lives in the `__package_name__` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " __package_name__\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a38cde65-254d-4219-a441-068766c0d4b5", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our retriever:\n", + "\n", + "- TODO: Update model instantiation with relevant params." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70cc8e65-2a02-408a-bbc6-8ef649057d82", + "metadata": {}, + "outputs": [], + "source": [ + "import { __ModuleName__ } from \"__full_import_path__\";\n", + "\n", + "const retriever = new __ModuleName__(\n", + " // ...\n", + ");" + ] + }, + { + "cell_type": "markdown", + "id": "5c5f2839-4020-424e-9fc9-07777eede442", + "metadata": {}, + "source": [ + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a", + "metadata": {}, + "outputs": [], + "source": [ + "const query = \"...\"\n", + "\n", + "await retriever.invoke(query);" + ] + }, + { + "cell_type": "markdown", + "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e", + "metadata": {}, + "source": [ + "## Use within a chain\n", + "\n", + "Like other retrievers, __ModuleName__ can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", + "\n", + "We will need a LLM or chat model:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25b647a3-f8f2-4541-a289-7a241e43f9df", + "metadata": {}, + "outputs": [], + "source": [ + "// @ls-docs-hide-cell\n", + "\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({\n", + " model: \"gpt-4o-mini\",\n", + " temperature: 0,\n", + "});" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae", + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", + "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", + "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", + "\n", + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromTemplate(`\n", + "Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}`);\n", + "\n", + "const formatDocs = (docs: Document[]) => {\n", + " return docs.map((doc) => doc.pageContent).join(\"\\n\\n\");\n", + "}\n", + "\n", + "// See https://js.langchain.com/v0.2/docs/tutorials/rag\n", + "const ragChain = RunnableSequence.from([\n", + " {\n", + " context: retriever.pipe(formatDocs),\n", + " question: new RunnablePassthrough(),\n", + " },\n", + " prompt,\n", + " llm,\n", + " new StringOutputParser(),\n", + "]);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8", + "metadata": {}, + "outputs": [], + "source": [ + "await ragChain.invoke(\"...\")" + ] + }, + { + "cell_type": "markdown", + "id": "d1ee55bc-ffc8-4cfa-801c-993953a08cfd", + "metadata": {}, + "source": [ + "## TODO: Any functionality or considerations specific to this retriever\n", + "\n", + "Fill in or delete if not relevant." + ] + }, + { + "cell_type": "markdown", + "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all __ModuleName__ features and configurations head to the [API reference](__api_ref_module__)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "typescript", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/langchain-scripts/src/cli/docs/templates/text_embedding.ipynb b/libs/langchain-scripts/src/cli/docs/templates/text_embedding.ipynb new file mode 100644 index 000000000000..48d85f8a3afc --- /dev/null +++ b/libs/langchain-scripts/src/cli/docs/templates/text_embedding.ipynb @@ -0,0 +1,228 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: __sidebar_label__\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "9a3d6f34", + "metadata": {}, + "source": [ + "# __ModuleName__\n", + "\n", + "- [ ] TODO: Make sure API reference link is correct\n", + "\n", + "This will help you get started with __ModuleName__ [embedding models](/docs/concepts#embedding-models) using LangChain. For detailed documentation on `__ModuleName__` features and configuration options, please refer to the [API reference](__api_ref_module__).\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "- TODO: Fill in table features.\n", + "- TODO: Remove JS support link if not relevant, otherwise ensure link is correct.\n", + "- TODO: Make sure API reference links are correct.\n", + "\n", + "| Class | Package | Local | [Py support](__python_doc_url__) | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: |\n", + "| [__ModuleName__](__api_ref_module__) | [__package_name__](__api_ref_package__) | __local__ | __py_support__ | ![NPM - Downloads](https://img.shields.io/npm/dm/__package_name__?style=flat-square&label=%20&) | ![NPM - Version](https://img.shields.io/npm/v/__package_name__?style=flat-square&label=%20&) |\n", + "\n", + "## Setup\n", + "\n", + "- [ ] TODO: Update with relevant info.\n", + "\n", + "To access __sidebar_label__ embedding models you'll need to create a/an __ModuleName__ account, get an API key, and install the `__package_name__` integration package.\n", + "\n", + "### Credentials\n", + "\n", + "- TODO: Update with relevant info.\n", + "\n", + "Head to (TODO: link) to sign up to `__sidebar_label__` and generate an API key. Once you've done this set the `__env_var_name__` environment variable:\n", + "\n", + "```bash\n", + "export __env_var_name__=\"your-api-key\"\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```bash\n", + "# export LANGCHAIN_TRACING_V2=\"true\"\n", + "# export LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain __ModuleName__ integration lives in the `__package_name__` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " __package_name__\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "45dd1724", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and generate chat completions:\n", + "\n", + "- TODO: Update model instantiation with relevant params." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ea7a09b", + "metadata": {}, + "outputs": [], + "source": [ + "import { __ModuleName__ } from \"__full_import_path__\";\n", + "\n", + "const embeddings = new __ModuleName__({\n", + " model: \"model-name\",\n", + " // ...\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "77d271b6", + "metadata": {}, + "source": [ + "## Indexing and Retrieval\n", + "\n", + "Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our RAG tutorials under the [working with external knowledge tutorials](/docs/tutorials/#working-with-external-knowledge).\n", + "\n", + "Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document using the demo [`MemoryVectorStore`](/docs/integrations/vectorstores/memory)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d817716b", + "metadata": {}, + "outputs": [], + "source": [ + "// Create a vector store with a sample text\n", + "import { MemoryVectorStore } from \"langchain/vectorstores/memory\";\n", + "\n", + "const text = \"LangChain is the framework for building context-aware reasoning applications\";\n", + "\n", + "const vectorstore = await MemoryVectorStore.fromDocuments(\n", + " [{ pageContent: text, metadata: {} }],\n", + " embeddings,\n", + ");\n", + "\n", + "// Use the vector store as a retriever that returns a single document\n", + "const retriever = vectorstore.asRetriever(1);\n", + "\n", + "// Retrieve the most similar text\n", + "const retrievedDocument = await retriever.invoke(\"What is LangChain?\");\n", + "\n", + "retrievedDocument.pageContent;" + ] + }, + { + "cell_type": "markdown", + "id": "e02b9855", + "metadata": {}, + "source": [ + "## Direct Usage\n", + "\n", + "Under the hood, the vectorstore and retriever implementations are calling `embeddings.embedDocument(...)` and `embeddings.embedQuery(...)` to create embeddings for the text(s) used in `fromDocuments` and the retriever's `invoke` operations, respectively.\n", + "\n", + "You can directly call these methods to get embeddings for your own use cases.\n", + "\n", + "### Embed single texts\n", + "\n", + "You can embed queries for search with `embedQuery`. This generates a vector representation specific to the query:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d2befcd", + "metadata": {}, + "outputs": [], + "source": [ + "const singleVector = await embeddings.embedQuery(text);\n", + "\n", + "console.log(singleVector.slice(0, 100));" + ] + }, + { + "cell_type": "markdown", + "id": "1b5a7d03", + "metadata": {}, + "source": [ + "### Embed multiple texts\n", + "\n", + "You can embed multiple texts for indexing with `embedDocuments`. The internals used for this method may (but do not have to) differ from embedding queries:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f4d6e97", + "metadata": {}, + "outputs": [], + "source": [ + "const text2 = \"LangGraph is a library for building stateful, multi-actor applications with LLMs\";\n", + "\n", + "const vectors = await embeddings.embedDocuments([text, text2]);\n", + "\n", + "console.log(vectors[0].slice(0, 100));\n", + "console.log(vectors[1].slice(0, 100));" + ] + }, + { + "cell_type": "markdown", + "id": "8938e581", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all __ModuleName__ features and configurations head to the API reference: __api_ref_module__" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "typescript", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/yarn.lock b/yarn.lock index f2c95da3ec90..131b835c6732 100644 --- a/yarn.lock +++ b/yarn.lock @@ -10994,7 +10994,6 @@ __metadata: eslint-plugin-prettier: ^4.2.1 jest: ^29.5.0 jest-environment-node: ^29.6.4 - langchain: 0.2.5 prettier: ^2.8.3 release-it: ^15.10.1 rollup: ^4.5.2 @@ -11654,7 +11653,6 @@ __metadata: jest-environment-node: ^29.6.4 js-tiktoken: ^1.0.12 langsmith: ~0.1.39 - ml-distance: ^4.0.0 ml-matrix: ^6.10.4 mustache: ^4.2.0 p-queue: ^6.6.2 @@ -21134,13 +21132,6 @@ __metadata: languageName: node linkType: hard -"binary-search@npm:^1.3.5": - version: 1.3.6 - resolution: "binary-search@npm:1.3.6" - checksum: 2e6b3459a9c1ba1bd674a6a855a5ef7505f70707422244430e3510e989c0df6074a49fe60784a98b93b51545c9bcace1db1defee06ff861b124c036a2f2836bf - languageName: node - linkType: hard - "bindings@npm:^1.5.0": version: 1.5.0 resolution: "bindings@npm:1.5.0" @@ -26304,7 +26295,6 @@ __metadata: js-yaml: ^4.1.0 langchain: "workspace:*" langsmith: ^0.1.30 - ml-distance: ^4.0.0 mongodb: ^6.3.0 pg: ^8.11.0 pickleparser: ^0.2.1 @@ -31293,181 +31283,6 @@ __metadata: languageName: node linkType: hard -"langchain@npm:0.2.5": - version: 0.2.5 - resolution: "langchain@npm:0.2.5" - dependencies: - "@langchain/core": ~0.2.0 - "@langchain/openai": ~0.1.0 - "@langchain/textsplitters": ~0.0.0 - binary-extensions: ^2.2.0 - js-tiktoken: ^1.0.12 - js-yaml: ^4.1.0 - jsonpointer: ^5.0.1 - langchainhub: ~0.0.8 - langsmith: ~0.1.30 - ml-distance: ^4.0.0 - openapi-types: ^12.1.3 - p-retry: 4 - uuid: ^9.0.0 - yaml: ^2.2.1 - zod: ^3.22.4 - zod-to-json-schema: ^3.22.3 - peerDependencies: - "@aws-sdk/client-s3": ^3.310.0 - "@aws-sdk/client-sagemaker-runtime": ^3.310.0 - "@aws-sdk/client-sfn": ^3.310.0 - "@aws-sdk/credential-provider-node": ^3.388.0 - "@azure/storage-blob": ^12.15.0 - "@browserbasehq/sdk": "*" - "@gomomento/sdk": ^1.51.1 - "@gomomento/sdk-core": ^1.51.1 - "@gomomento/sdk-web": ^1.51.1 - "@mendable/firecrawl-js": ^0.0.13 - "@notionhq/client": ^2.2.10 - "@pinecone-database/pinecone": "*" - "@supabase/supabase-js": ^2.10.0 - "@vercel/kv": ^0.2.3 - "@xata.io/client": ^0.28.0 - apify-client: ^2.7.1 - assemblyai: ^4.0.0 - axios: "*" - cheerio: ^1.0.0-rc.12 - chromadb: "*" - convex: ^1.3.1 - couchbase: ^4.3.0 - d3-dsv: ^2.0.0 - epub2: ^3.0.1 - fast-xml-parser: "*" - handlebars: ^4.7.8 - html-to-text: ^9.0.5 - ignore: ^5.2.0 - ioredis: ^5.3.2 - jsdom: "*" - mammoth: ^1.6.0 - mongodb: ">=5.2.0" - node-llama-cpp: "*" - notion-to-md: ^3.1.0 - officeparser: ^4.0.4 - pdf-parse: 1.1.1 - peggy: ^3.0.2 - playwright: ^1.32.1 - puppeteer: ^19.7.2 - pyodide: ^0.24.1 - redis: ^4.6.4 - sonix-speech-recognition: ^2.1.1 - srt-parser-2: ^1.2.3 - typeorm: ^0.3.12 - weaviate-ts-client: "*" - web-auth-library: ^1.0.3 - ws: ^8.14.2 - youtube-transcript: ^1.0.6 - youtubei.js: ^9.1.0 - peerDependenciesMeta: - "@aws-sdk/client-s3": - optional: true - "@aws-sdk/client-sagemaker-runtime": - optional: true - "@aws-sdk/client-sfn": - optional: true - "@aws-sdk/credential-provider-node": - optional: true - "@azure/storage-blob": - optional: true - "@browserbasehq/sdk": - optional: true - "@gomomento/sdk": - optional: true - "@gomomento/sdk-core": - optional: true - "@gomomento/sdk-web": - optional: true - "@mendable/firecrawl-js": - optional: true - "@notionhq/client": - optional: true - "@pinecone-database/pinecone": - optional: true - "@supabase/supabase-js": - optional: true - "@vercel/kv": - optional: true - "@xata.io/client": - optional: true - apify-client: - optional: true - assemblyai: - optional: true - axios: - optional: true - cheerio: - optional: true - chromadb: - optional: true - convex: - optional: true - couchbase: - optional: true - d3-dsv: - optional: true - epub2: - optional: true - faiss-node: - optional: true - fast-xml-parser: - optional: true - handlebars: - optional: true - html-to-text: - optional: true - ignore: - optional: true - ioredis: - optional: true - jsdom: - optional: true - mammoth: - optional: true - mongodb: - optional: true - node-llama-cpp: - optional: true - notion-to-md: - optional: true - officeparser: - optional: true - pdf-parse: - optional: true - peggy: - optional: true - playwright: - optional: true - puppeteer: - optional: true - pyodide: - optional: true - redis: - optional: true - sonix-speech-recognition: - optional: true - srt-parser-2: - optional: true - typeorm: - optional: true - weaviate-ts-client: - optional: true - web-auth-library: - optional: true - ws: - optional: true - youtube-transcript: - optional: true - youtubei.js: - optional: true - checksum: 18078968f7a788052d02cc681e1a58d85ae7d8461db039a9e53ea7f8a17057c76a186788fceed0b0d2d02be732afa519305f16f12df57e90e734159cf7518df0 - languageName: node - linkType: hard - "langchain@workspace:*, langchain@workspace:langchain, langchain@~0.2.3": version: 0.0.0-use.local resolution: "langchain@workspace:langchain" @@ -31545,10 +31360,8 @@ __metadata: js-yaml: ^4.1.0 jsdom: ^22.1.0 jsonpointer: ^5.0.1 - langchainhub: ~0.0.8 - langsmith: ~0.1.30 + langsmith: ~0.1.40 mammoth: ^1.5.1 - ml-distance: ^4.0.0 mongodb: ^5.2.0 node-llama-cpp: 2.7.3 notion-to-md: ^3.1.0 @@ -31761,13 +31574,6 @@ __metadata: languageName: unknown linkType: soft -"langchainhub@npm:~0.0.8": - version: 0.0.8 - resolution: "langchainhub@npm:0.0.8" - checksum: b46316adbbd5f1971892b423e6a7e9c7681f4c44e4ac3c3b79c6beef96a28fc9582a4ee14affb617fd887f3dac8cae55368e1b7c4a41bb43f86f17c5d63031e3 - languageName: node - linkType: hard - "langchainjs@workspace:.": version: 0.0.0-use.local resolution: "langchainjs@workspace:." @@ -31809,6 +31615,31 @@ __metadata: languageName: node linkType: hard +"langsmith@npm:~0.1.40": + version: 0.1.40 + resolution: "langsmith@npm:0.1.40" + dependencies: + "@types/uuid": ^9.0.1 + commander: ^10.0.1 + p-queue: ^6.6.2 + p-retry: 4 + semver: ^7.6.3 + uuid: ^9.0.0 + peerDependencies: + "@langchain/core": "*" + langchain: "*" + openai: "*" + peerDependenciesMeta: + "@langchain/core": + optional: true + langchain: + optional: true + openai: + optional: true + checksum: 8c5bcf5137e93a9a17203fbe21d6a61f45c98fccafc2040d56e9cc15a4ee432456d986adf0e590d8c436b72d18143053ce6e65f021115f1596dd4519ec2805d7 + languageName: node + linkType: hard + "language-subtag-registry@npm:^0.3.20, language-subtag-registry@npm:~0.3.2": version: 0.3.22 resolution: "language-subtag-registry@npm:0.3.22" @@ -33204,15 +33035,6 @@ __metadata: languageName: node linkType: hard -"ml-array-mean@npm:^1.1.6": - version: 1.1.6 - resolution: "ml-array-mean@npm:1.1.6" - dependencies: - ml-array-sum: ^1.1.6 - checksum: 81999dac8bad3bf2dafb23a9bc71883879b9d55889e48d00b91dd4a2568957a6f5373632ae57324760d1e1d7d29ad45ab4ea7ae32de67ce144d57a21e36dd9c2 - languageName: node - linkType: hard - "ml-array-min@npm:^1.2.3": version: 1.2.3 resolution: "ml-array-min@npm:1.2.3" @@ -33233,33 +33055,6 @@ __metadata: languageName: node linkType: hard -"ml-array-sum@npm:^1.1.6": - version: 1.1.6 - resolution: "ml-array-sum@npm:1.1.6" - dependencies: - is-any-array: ^2.0.0 - checksum: 369dbb3681e3f8b0d0facba9fcfc981656dac49a80924859c3ed8f0a5880fb6db2d6e534f8b7b9c3cda59248152e61b27d6419d19c69539de7c3aa6aea3094eb - languageName: node - linkType: hard - -"ml-distance-euclidean@npm:^2.0.0": - version: 2.0.0 - resolution: "ml-distance-euclidean@npm:2.0.0" - checksum: e31f98a947ce6971c35d74e6d2521800f0d219efb34c78b20b5f52debd206008d52e677685c09839e6bab5d2ed233aa009314236e4e548d5fafb60f2f71e2b3e - languageName: node - linkType: hard - -"ml-distance@npm:^4.0.0": - version: 4.0.0 - resolution: "ml-distance@npm:4.0.0" - dependencies: - ml-array-mean: ^1.1.6 - ml-distance-euclidean: ^2.0.0 - ml-tree-similarity: ^1.0.0 - checksum: 37d07d52c5f1d185833336d73764e97dcdf8abe37e276fd6a3f7d536cf373e7cbfd6497f433192bd7d100b0b95518a616a8ae6df53b74c5b0d5fbfa7a971ae4d - languageName: node - linkType: hard - "ml-matrix@npm:^6.10.4": version: 6.10.4 resolution: "ml-matrix@npm:6.10.4" @@ -33270,16 +33065,6 @@ __metadata: languageName: node linkType: hard -"ml-tree-similarity@npm:^1.0.0": - version: 1.0.0 - resolution: "ml-tree-similarity@npm:1.0.0" - dependencies: - binary-search: ^1.3.5 - num-sort: ^2.0.0 - checksum: f99e217dc94acf75c089469dc3c278f388146e43c82212160b6b75daa14309902f84eb0a00c67d502fc79dc171cf15a33d392326e024b2e89881adc585d15513 - languageName: node - linkType: hard - "mnemonist@npm:0.38.3": version: 0.38.3 resolution: "mnemonist@npm:0.38.3" @@ -34063,13 +33848,6 @@ __metadata: languageName: node linkType: hard -"num-sort@npm:^2.0.0": - version: 2.1.0 - resolution: "num-sort@npm:2.1.0" - checksum: 5a80cd0456c8847f71fb80ad3c3596714cebede76de585aa4fed2b9a4fb0907631edca1f7bb31c24dbb9928b66db3d03059994cc365d2ae011b80ddddac28f6e - languageName: node - linkType: hard - "nwsapi@npm:^2.2.4": version: 2.2.7 resolution: "nwsapi@npm:2.2.7"