From cb4e6ac941de44de2c43b94e5b66435e548a0e75 Mon Sep 17 00:00:00 2001
From: Erick Friis <erick@langchain.dev>
Date: Sat, 21 Dec 2024 09:38:31 -0800
Subject: [PATCH 1/7] docs: frontmatter gen, colab/github links (#28852)

---
 docs/scripts/notebook_convert.py              | 22 +++--
 docs/src/theme/DocItem/Layout/index.js        | 85 +++++++++++++++++++
 .../theme/DocItem/Layout/styles.module.css    | 10 +++
 3 files changed, 109 insertions(+), 8 deletions(-)
 create mode 100644 docs/src/theme/DocItem/Layout/index.js
 create mode 100644 docs/src/theme/DocItem/Layout/styles.module.css
diff --git a/docs/scripts/notebook_convert.py b/docs/scripts/notebook_convert.py
index 429734f115817..fb0e3c807561d 100644
--- a/docs/scripts/notebook_convert.py
+++ b/docs/scripts/notebook_convert.py
@@ -143,16 +143,22 @@ def _modify_frontmatter(
     edit_url = (
         f"https://github.com/langchain-ai/langchain/edit/master/docs/docs/{rel_path}"
     )
+    frontmatter = {
+        "custom_edit_url": edit_url,
+    }
     if re.match(r"^[\s\n]*---\n", body):
-        # if custom_edit_url already exists, leave it
-        if re.match(r"custom_edit_url: ", body):
-            return body
-        else:
-            return re.sub(
-                r"^[\s\n]*---\n", f"---\ncustom_edit_url: {edit_url}\n", body, count=1
-            )
+        # frontmatter already present
+
+        for k, v in frontmatter.items():
+            # if key already exists, leave it
+            if re.match(f"{k}: ", body):
+                continue
+            else:
+                body = re.sub(r"^[\s\n]*---\n", f"---\n{k}: {v}\n", body, count=1)
+        return body
     else:
-        return f"---\ncustom_edit_url: {edit_url}\n---\n{body}"
+        insert = "\n".join([f"{k}: {v}" for k, v in frontmatter.items()])
+        return f"---\n{insert}\n---\n{body}"
 
 
 def _convert_notebook(
diff --git a/docs/src/theme/DocItem/Layout/index.js b/docs/src/theme/DocItem/Layout/index.js
new file mode 100644
index 0000000000000..768e65112d284
--- /dev/null
+++ b/docs/src/theme/DocItem/Layout/index.js
@@ -0,0 +1,85 @@
+import React from 'react';
+import clsx from 'clsx';
+import {useWindowSize} from '@docusaurus/theme-common';
+import {useDoc} from '@docusaurus/plugin-content-docs/client';
+import DocItemPaginator from '@theme/DocItem/Paginator';
+import DocVersionBanner from '@theme/DocVersionBanner';
+import DocVersionBadge from '@theme/DocVersionBadge';
+import DocItemFooter from '@theme/DocItem/Footer';
+import DocItemTOCMobile from '@theme/DocItem/TOC/Mobile';
+import DocItemTOCDesktop from '@theme/DocItem/TOC/Desktop';
+import DocItemContent from '@theme/DocItem/Content';
+import DocBreadcrumbs from '@theme/DocBreadcrumbs';
+import ContentVisibility from '@theme/ContentVisibility';
+import styles from './styles.module.css';
+/**
+ * Decide if the toc should be rendered, on mobile or desktop viewports
+ */
+function useDocTOC() {
+  const {frontMatter, toc} = useDoc();
+  const windowSize = useWindowSize();
+  const hidden = frontMatter.hide_table_of_contents;
+  const canRender = !hidden && toc.length > 0;
+  const mobile = canRender ? <DocItemTOCMobile /> : undefined;
+  const desktop =
+    canRender && (windowSize === 'desktop' || windowSize === 'ssr') ? (
+      <DocItemTOCDesktop />
+    ) : undefined;
+  return {
+    hidden,
+    mobile,
+    desktop,
+  };
+}
+export default function DocItemLayout({children}) {
+  const docTOC = useDocTOC();
+  const {metadata, frontMatter} = useDoc();
+
+  "https://github.com/langchain-ai/langchain/blob/master/docs/docs/introduction.ipynb"
+  "https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/introduction.ipynb"
+
+  console.log({metadata, frontMatter})
+
+  const linkColab = frontMatter.link_colab || (
+    metadata.editUrl?.endsWith(".ipynb") 
+      ? metadata.editUrl?.replace("https://github.com/langchain-ai/langchain/edit/", "https://colab.research.google.com/github/langchain-ai/langchain/blob/") 
+      : null
+  );
+  const linkGithub = frontMatter.link_github || metadata.editUrl?.replace("/edit/", "/blob/");
+  
+  console.log({linkColab, linkGithub})
+
+  return (
+    <div className="row">
+      <div className={clsx('col', !docTOC.hidden && styles.docItemCol)}>
+        <ContentVisibility metadata={metadata} />
+        <DocVersionBanner />
+        <div className={styles.docItemContainer}>
+          <article>
+            <DocBreadcrumbs />
+            <DocVersionBadge />
+            {docTOC.mobile}
+            <div style={{ 
+              display: "flex", 
+              flexDirection: "column",
+              alignItems: "flex-end",
+              float: "right",
+            }}>
+              {linkColab && (<a target="_blank" href={linkColab}>
+                <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
+              </a>)}
+              {linkGithub && (<a href={linkGithub} target="_blank">
+                <img src="https://img.shields.io/badge/Open%20on%20GitHub-grey?logo=github&logoColor=white" 
+                    alt="Open on GitHub" />
+              </a>)}
+            </div>
+            <DocItemContent>{children}</DocItemContent>
+            <DocItemFooter />
+          </article>
+          <DocItemPaginator />
+        </div>
+      </div>
+      {docTOC.desktop && <div className="col col--3">{docTOC.desktop}</div>}
+    </div>
+  );
+}
diff --git a/docs/src/theme/DocItem/Layout/styles.module.css b/docs/src/theme/DocItem/Layout/styles.module.css
new file mode 100644
index 0000000000000..d5aaec1322c92
--- /dev/null
+++ b/docs/src/theme/DocItem/Layout/styles.module.css
@@ -0,0 +1,10 @@
+.docItemContainer header + *,
+.docItemContainer article > *:first-child {
+  margin-top: 0;
+}
+
+@media (min-width: 997px) {
+  .docItemCol {
+    max-width: 75% !important;
+  }
+}

From 32917a0b98cb8edcfb8d0e84f0878434e1c3f192 Mon Sep 17 00:00:00 2001
From: Darien Schettler <50381286+darien-schettler@users.noreply.github.com>
Date: Sun, 22 Dec 2024 19:16:16 -0500
Subject: [PATCH 2/7] Update dataframe.py (#28871)

community: optimize DataFrame document loader

**Description:**
Simplify the `lazy_load` method in the DataFrame document loader by
combining text extraction and metadata cleanup into a single operation.
This makes the code more concise while maintaining the same
functionality.

**Issue:** N/A

**Dependencies:** None

**Twitter handle:** N/A
---
 .../langchain_community/document_loaders/dataframe.py          | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libs/community/langchain_community/document_loaders/dataframe.py b/libs/community/langchain_community/document_loaders/dataframe.py
index 1b508533f8d93..74ad56b53f783 100644
--- a/libs/community/langchain_community/document_loaders/dataframe.py
+++ b/libs/community/langchain_community/document_loaders/dataframe.py
@@ -21,9 +21,8 @@ def lazy_load(self) -> Iterator[Document]:
         """Lazy load records from dataframe."""
 
         for _, row in self.data_frame.iterrows():
-            text = row[self.page_content_column]
             metadata = row.to_dict()
-            metadata.pop(self.page_content_column)
+            text = metadata.pop(self.page_content_column)
             yield Document(page_content=text, metadata=metadata)
 
 

From 41b6a86bbe030291cf8ee284ed0cd70dd493152b Mon Sep 17 00:00:00 2001
From: Mohammad Mohtashim <45242107+keenborder786@users.noreply.github.com>
Date: Mon, 23 Dec 2024 19:50:22 +0500
Subject: [PATCH 3/7] Community: LlamaCppEmbeddings `embed_documents` and
 `embed_query` (#28827)

- **Description:** `embed_documents` and `embed_query` was throwing off
the error as stated in the issue. The issue was that `Llama` client is
returning the embeddings in a nested list which is not being accounted
for in the current implementation and therefore the stated error is
being raised.
- **Issue:** #28813

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
---
 .../embeddings/llamacpp.py                    | 50 ++++++++++++-------
 .../unit_tests/embeddings/test_llamacpp.py    | 40 +++++++++++++++
 2 files changed, 72 insertions(+), 18 deletions(-)
 create mode 100644 libs/community/tests/unit_tests/embeddings/test_llamacpp.py

diff --git a/libs/community/langchain_community/embeddings/llamacpp.py b/libs/community/langchain_community/embeddings/llamacpp.py
index 6487312fd31d0..4adfeb0e52774 100644
--- a/libs/community/langchain_community/embeddings/llamacpp.py
+++ b/libs/community/langchain_community/embeddings/llamacpp.py
@@ -20,7 +20,7 @@ class LlamaCppEmbeddings(BaseModel, Embeddings):
     """
 
     client: Any = None  #: :meta private:
-    model_path: str
+    model_path: str = Field(default="")
 
     n_ctx: int = Field(512, alias="n_ctx")
     """Token context window."""
@@ -88,21 +88,22 @@ def validate_environment(self) -> Self:
         if self.n_gpu_layers is not None:
             model_params["n_gpu_layers"] = self.n_gpu_layers
 
-        try:
-            from llama_cpp import Llama
-
-            self.client = Llama(model_path, embedding=True, **model_params)
-        except ImportError:
-            raise ImportError(
-                "Could not import llama-cpp-python library. "
-                "Please install the llama-cpp-python library to "
-                "use this embedding model: pip install llama-cpp-python"
-            )
-        except Exception as e:
-            raise ValueError(
-                f"Could not load Llama model from path: {model_path}. "
-                f"Received error {e}"
-            )
+        if not self.client:
+            try:
+                from llama_cpp import Llama
+
+                self.client = Llama(model_path, embedding=True, **model_params)
+            except ImportError:
+                raise ImportError(
+                    "Could not import llama-cpp-python library. "
+                    "Please install the llama-cpp-python library to "
+                    "use this embedding model: pip install llama-cpp-python"
+                )
+            except Exception as e:
+                raise ValueError(
+                    f"Could not load Llama model from path: {model_path}. "
+                    f"Received error {e}"
+                )
 
         return self
 
@@ -116,7 +117,17 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]:
             List of embeddings, one for each text.
         """
         embeddings = self.client.create_embedding(texts)
-        return [list(map(float, e["embedding"])) for e in embeddings["data"]]
+        final_embeddings = []
+        for e in embeddings["data"]:
+            try:
+                if isinstance(e["embedding"][0], list):
+                    for data in e["embedding"]:
+                        final_embeddings.append(list(map(float, data)))
+                else:
+                    final_embeddings.append(list(map(float, e["embedding"])))
+            except (IndexError, TypeError):
+                final_embeddings.append(list(map(float, e["embedding"])))
+        return final_embeddings
 
     def embed_query(self, text: str) -> List[float]:
         """Embed a query using the Llama model.
@@ -128,4 +139,7 @@ def embed_query(self, text: str) -> List[float]:
             Embeddings for the text.
         """
         embedding = self.client.embed(text)
-        return list(map(float, embedding))
+        if not isinstance(embedding, list):
+            return list(map(float, embedding))
+        else:
+            return list(map(float, embedding[0]))
diff --git a/libs/community/tests/unit_tests/embeddings/test_llamacpp.py b/libs/community/tests/unit_tests/embeddings/test_llamacpp.py
new file mode 100644
index 0000000000000..ca2bd758216cf
--- /dev/null
+++ b/libs/community/tests/unit_tests/embeddings/test_llamacpp.py
@@ -0,0 +1,40 @@
+from typing import Generator
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from langchain_community.embeddings.llamacpp import LlamaCppEmbeddings
+
+
+@pytest.fixture
+def mock_llama_client() -> Generator[MagicMock, None, None]:
+    with patch(
+        "langchain_community.embeddings.llamacpp.LlamaCppEmbeddings"
+    ) as MockLlama:
+        mock_client = MagicMock()
+        MockLlama.return_value = mock_client
+        yield mock_client
+
+
+def test_initialization(mock_llama_client: MagicMock) -> None:
+    embeddings = LlamaCppEmbeddings(client=mock_llama_client)  # type: ignore[call-arg]
+    assert embeddings.client is not None
+
+
+def test_embed_documents(mock_llama_client: MagicMock) -> None:
+    mock_llama_client.create_embedding.return_value = {
+        "data": [{"embedding": [[0.1, 0.2, 0.3]]}, {"embedding": [[0.4, 0.5, 0.6]]}]
+    }
+    embeddings = LlamaCppEmbeddings(client=mock_llama_client)  # type: ignore[call-arg]
+    texts = ["Hello world", "Test document"]
+    result = embeddings.embed_documents(texts)
+    expected = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
+    assert result == expected
+
+
+def test_embed_query(mock_llama_client: MagicMock) -> None:
+    mock_llama_client.embed.return_value = [[0.1, 0.2, 0.3]]
+    embeddings = LlamaCppEmbeddings(client=mock_llama_client)  # type: ignore[call-arg]
+    result = embeddings.embed_query("Sample query")
+    expected = [0.1, 0.2, 0.3]
+    assert result == expected

From 4b4d09f82b5451df34568096c2d8906c8f5a0a6d Mon Sep 17 00:00:00 2001
From: ZhangShenao <15201440436@163.com>
Date: Mon, 23 Dec 2024 22:51:44 +0800
Subject: [PATCH 4/7] [Doc] Improvement: Fix docs of `ChatMLX` (#28884)

- `ChatMLX` doesn't supports the role of system.
- Fix https://github.com/langchain-ai/langchain/issues/28532
#28532
---
 docs/docs/integrations/chat/mlx.ipynb | 46 +++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/docs/docs/integrations/chat/mlx.ipynb b/docs/docs/integrations/chat/mlx.ipynb
index a5945dffae408..dc852da549d55 100644
--- a/docs/docs/integrations/chat/mlx.ipynb
+++ b/docs/docs/integrations/chat/mlx.ipynb
@@ -155,8 +155,48 @@
     "tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
     "\n",
     "# setup ReAct style prompt\n",
-    "prompt = hub.pull(\"hwchase17/react-json\")\n",
-    "prompt = prompt.partial(\n",
+    "# Based on 'hwchase17/react' prompt modification, cause mlx does not support the `System` role\n",
+    "human_prompt = \"\"\"\n",
+    "Answer the following questions as best you can. You have access to the following tools:\n",
+    "\n",
+    "{tools}\n",
+    "\n",
+    "The way you use the tools is by specifying a json blob.\n",
+    "Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\n",
+    "\n",
+    "The only values that should be in the \"action\" field are: {tool_names}\n",
+    "\n",
+    "The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:\n",
+    "\n",
+    "```\n",
+    "{{\n",
+    "  \"action\": $TOOL_NAME,\n",
+    "  \"action_input\": $INPUT\n",
+    "}}\n",
+    "```\n",
+    "\n",
+    "ALWAYS use the following format:\n",
+    "\n",
+    "Question: the input question you must answer\n",
+    "Thought: you should always think about what to do\n",
+    "Action:\n",
+    "```\n",
+    "$JSON_BLOB\n",
+    "```\n",
+    "Observation: the result of the action\n",
+    "... (this Thought/Action/Observation can repeat N times)\n",
+    "Thought: I now know the final answer\n",
+    "Final Answer: the final answer to the original input question\n",
+    "\n",
+    "Begin! Reminder to always use the exact characters `Final Answer` when responding.\n",
+    "\n",
+    "{input}\n",
+    "\n",
+    "{agent_scratchpad}\n",
+    "\n",
+    "\"\"\"\n",
+    "\n",
+    "prompt = human_prompt.partial(\n",
     "    tools=render_text_description(tools),\n",
     "    tool_names=\", \".join([t.name for t in tools]),\n",
     ")\n",
@@ -207,7 +247,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.18"
+   "version": "3.12.7"
   }
  },
  "nbformat": 4,

From 8d9907088b843756b5aa3f49f11f51b451567fa1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Panella?= <ianchi74@outlook.com>
Date: Mon, 23 Dec 2024 10:05:48 -0500
Subject: [PATCH 5/7] community(azuresearch): allow to use any valid credential
 (#28873)

Add option to use any valid credential type.
Differentiates async cases needed by Azure Search.

This could replace the use of a static token
---
 .../vectorstores/azuresearch.py               | 21 ++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/libs/community/langchain_community/vectorstores/azuresearch.py b/libs/community/langchain_community/vectorstores/azuresearch.py
index 6930c8319e4d9..d0aa15e2acbd1 100644
--- a/libs/community/langchain_community/vectorstores/azuresearch.py
+++ b/libs/community/langchain_community/vectorstores/azuresearch.py
@@ -42,6 +42,8 @@
 logger = logging.getLogger()
 
 if TYPE_CHECKING:
+    from azure.core.credentials import TokenCredential
+    from azure.core.credentials_async import AsyncTokenCredential
     from azure.search.documents import SearchClient, SearchItemPaged
     from azure.search.documents.aio import (
         AsyncSearchItemPaged,
@@ -96,10 +98,13 @@ def _get_search_client(
     cors_options: Optional[CorsOptions] = None,
     async_: bool = False,
     additional_search_client_options: Optional[Dict[str, Any]] = None,
+    azure_credential: Optional[TokenCredential] = None,
+    azure_async_credential: Optional[AsyncTokenCredential] = None,
 ) -> Union[SearchClient, AsyncSearchClient]:
     from azure.core.credentials import AccessToken, AzureKeyCredential, TokenCredential
     from azure.core.exceptions import ResourceNotFoundError
     from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
+    from azure.identity.aio import DefaultAzureCredential as AsyncDefaultAzureCredential
     from azure.search.documents import SearchClient
     from azure.search.documents.aio import SearchClient as AsyncSearchClient
     from azure.search.documents.indexes import SearchIndexClient
@@ -143,12 +148,17 @@ def get_token(
         if key.upper() == "INTERACTIVE":
             credential = InteractiveBrowserCredential()
             credential.get_token("https://search.azure.com/.default")
+            async_credential = credential
         else:
             credential = AzureKeyCredential(key)
+            async_credential = credential
     elif azure_ad_access_token is not None:
         credential = AzureBearerTokenCredential(azure_ad_access_token)
+        async_credential = credential
     else:
-        credential = DefaultAzureCredential()
+        credential = azure_credential or DefaultAzureCredential()
+        async_credential = azure_async_credential or AsyncDefaultAzureCredential()
+
     index_client: SearchIndexClient = SearchIndexClient(
         endpoint=endpoint,
         credential=credential,
@@ -266,7 +276,7 @@ def fmt_err(x: str) -> str:
         return AsyncSearchClient(
             endpoint=endpoint,
             index_name=index_name,
-            credential=credential,
+            credential=async_credential,
             user_agent=user_agent,
             **additional_search_client_options,
         )
@@ -278,7 +288,7 @@ class AzureSearch(VectorStore):
     def __init__(
         self,
         azure_search_endpoint: str,
-        azure_search_key: str,
+        azure_search_key: Optional[str],
         index_name: str,
         embedding_function: Union[Callable, Embeddings],
         search_type: str = "hybrid",
@@ -295,6 +305,8 @@ def __init__(
         vector_search_dimensions: Optional[int] = None,
         additional_search_client_options: Optional[Dict[str, Any]] = None,
         azure_ad_access_token: Optional[str] = None,
+        azure_credential: Optional[TokenCredential] = None,
+        azure_async_credential: Optional[AsyncTokenCredential] = None,
         **kwargs: Any,
     ):
         try:
@@ -361,6 +373,7 @@ def __init__(
             user_agent=user_agent,
             cors_options=cors_options,
             additional_search_client_options=additional_search_client_options,
+            azure_credential=azure_credential,
         )
         self.async_client = _get_search_client(
             azure_search_endpoint,
@@ -377,6 +390,8 @@ def __init__(
             user_agent=user_agent,
             cors_options=cors_options,
             async_=True,
+            azure_credential=azure_credential,
+            azure_async_credential=azure_async_credential,
         )
         self.search_type = search_type
         self.semantic_configuration_name = semantic_configuration_name

From e5c9da3eb622a3fd32fe073fa5246d8293895215 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Wang=20Ran=20=28=E6=B1=AA=E7=84=B6=29?=
 <wangr@smail.nju.edu.cn>
Date: Mon, 23 Dec 2024 23:31:23 +0800
Subject: [PATCH 6/7] core[patch]: remove redundant imports (#28861)

`Graph` has been imported at Line: 62
---
 libs/core/langchain_core/runnables/base.py | 2 --
 libs/core/langchain_core/utils/pydantic.py | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/libs/core/langchain_core/runnables/base.py b/libs/core/langchain_core/runnables/base.py
index 3e43fb10a8e6b..893f393d8b174 100644
--- a/libs/core/langchain_core/runnables/base.py
+++ b/libs/core/langchain_core/runnables/base.py
@@ -534,8 +534,6 @@ def get_config_jsonschema(
 
     def get_graph(self, config: Optional[RunnableConfig] = None) -> Graph:
         """Return a graph representation of this Runnable."""
-        from langchain_core.runnables.graph import Graph
-
         graph = Graph()
         try:
             input_node = graph.add_node(self.get_input_schema(config))
diff --git a/libs/core/langchain_core/utils/pydantic.py b/libs/core/langchain_core/utils/pydantic.py
index ae403574bb761..65f12232f9fc6 100644
--- a/libs/core/langchain_core/utils/pydantic.py
+++ b/libs/core/langchain_core/utils/pydantic.py
@@ -279,7 +279,7 @@ def _create_subset_model_v2(
     fn_description: Optional[str] = None,
 ) -> type[pydantic.BaseModel]:
     """Create a pydantic model with a subset of the model fields."""
-    from pydantic import ConfigDict, create_model
+    from pydantic import create_model
     from pydantic.fields import FieldInfo
 
     descriptions_ = descriptions or {}

From 6352edf77fe2ef5f412201562426fbbdf5c6bfeb Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Mon, 23 Dec 2024 16:55:44 +0100
Subject: [PATCH 7/7] docs: CrateDB: Register package `langchain-cratedb`, and
 add minimal "provider" documentation  (#28877)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Hi Erick. Coming back from a previous attempt, we now made a separate
package for the CrateDB adapter, called `langchain-cratedb`, as advised.
Other than registering the package within `libs/packages.yml`, this
patch includes a minimal amount of documentation to accompany the advent
of this new package. Let us know about any mistakes we made, or changes
you would like to see. Thanks, Andreas.

## About
- **Description:** Register a new database adapter package,
`langchain-cratedb`, providing traditional vector store, document
loader, and chat message history features for a start.
- **Addressed to:** @efriis, @eyurtsev
- **References:** GH-27710
- **Preview:** [Providers » More »
CrateDB](https://langchain-git-fork-crate-workbench-register-la-4bf945-langchain.vercel.app/docs/integrations/providers/cratedb/)

## Status
- **PyPI:** https://pypi.org/project/langchain-cratedb/
- **GitHub:** https://github.com/crate/langchain-cratedb
- **Documentation (CrateDB):**
https://cratedb.com/docs/guide/integrate/langchain/
- **Documentation (LangChain):** _This PR._

## Backlog?
Is this applicable for this kind of patch?
> - [ ] **Add tests and docs**: If you're adding a new integration,
please include
> 1. a test for the integration, preferably unit tests that do not rely
on network access,
> 2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.

## Q&A
1. Notebooks that use the LangChain CrateDB adapter are currently at
[CrateDB LangChain
Examples](https://github.com/crate/cratedb-examples/tree/main/topic/machine-learning/llm-langchain),
and the documentation refers to them. Because they are derived from very
old blueprints coming from LangChain 0.0.x times, we guess they need a
refresh before adding them to `docs/docs/integrations`. Is it applicable
to merge this minimal package registration + documentation patch, which
already includes valid code snippets in `cratedb.mdx`, and add
corresponding notebooks on behalf of a subsequent patch later?

2. How would it work getting into the tabular list of _Integration
Packages_ enumerated on the [documentation entrypoint page about
Providers](https://python.langchain.com/docs/integrations/providers/)?

/cc Please also review, @ckurze, @wierdvanderhaar, @kneth,
@simonprickett, if you can find the time. Thanks!
---
 docs/docs/integrations/providers/cratedb.mdx | 132 +++++++++++++++++++
 libs/packages.yml                            |   3 +
 2 files changed, 135 insertions(+)
 create mode 100644 docs/docs/integrations/providers/cratedb.mdx

diff --git a/docs/docs/integrations/providers/cratedb.mdx b/docs/docs/integrations/providers/cratedb.mdx
new file mode 100644
index 0000000000000..24e47930407c0
--- /dev/null
+++ b/docs/docs/integrations/providers/cratedb.mdx
@@ -0,0 +1,132 @@
+# CrateDB
+
+> [CrateDB] is a distributed and scalable SQL database for storing and
+> analyzing massive amounts of data in near real-time, even with complex
+> queries. It is PostgreSQL-compatible, based on Lucene, and inheriting
+> from Elasticsearch.
+
+
+## Installation and Setup
+
+### Setup CrateDB
+There are two ways to get started with CrateDB quickly. Alternatively,
+choose other [CrateDB installation options].
+
+#### Start CrateDB on your local machine
+Example: Run a single-node CrateDB instance with security disabled,
+using Docker or Podman. This is not recommended for production use.
+
+```bash
+docker run --name=cratedb --rm \
+  --publish=4200:4200 --publish=5432:5432 --env=CRATE_HEAP_SIZE=2g \
+  crate:latest -Cdiscovery.type=single-node
+```
+
+#### Deploy cluster on CrateDB Cloud
+[CrateDB Cloud] is a managed CrateDB service. Sign up for a
+[free trial][CrateDB Cloud Console].
+
+### Install Client
+Install the most recent version of the `langchain-cratedb` package
+and a few others that are needed for this tutorial.
+```bash
+pip install --upgrade langchain-cratedb langchain-openai unstructured
+```
+
+
+## Documentation
+For a more detailed walkthrough of the CrateDB wrapper, see
+[using LangChain with CrateDB]. See also [all features of CrateDB]
+to learn about other functionality provided by CrateDB.
+
+
+## Features
+The CrateDB adapter for LangChain provides APIs to use CrateDB as vector store,
+document loader, and storage for chat messages.
+
+### Vector Store
+Use the CrateDB vector store functionality around `FLOAT_VECTOR` and `KNN_MATCH`
+for similarity search and other purposes. See also [CrateDBVectorStore Tutorial].
+
+Make sure you've configured a valid OpenAI API key.
+```bash
+export OPENAI_API_KEY=sk-XJZ...
+```
+```python
+from langchain_community.document_loaders import UnstructuredURLLoader
+from langchain_cratedb import CrateDBVectorStore
+from langchain_openai import OpenAIEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+
+loader = UnstructuredURLLoader(urls=["https://github.com/langchain-ai/langchain/raw/refs/tags/langchain-core==0.3.28/docs/docs/how_to/state_of_the_union.txt"])
+documents = loader.load()
+text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+docs = text_splitter.split_documents(documents)
+
+embeddings = OpenAIEmbeddings()
+
+# Connect to a self-managed CrateDB instance on localhost.
+CONNECTION_STRING = "crate://?schema=testdrive"
+
+store = CrateDBVectorStore.from_documents(
+    documents=docs,
+    embedding=embeddings,
+    collection_name="state_of_the_union",
+    connection=CONNECTION_STRING,
+)
+
+query = "What did the president say about Ketanji Brown Jackson"
+docs_with_score = store.similarity_search_with_score(query)
+```
+
+### Document Loader
+Load load documents from a CrateDB database table, using the document loader
+`CrateDBLoader`, which is based on SQLAlchemy. See also [CrateDBLoader Tutorial].
+
+To use the document loader in your applications:
+```python
+import sqlalchemy as sa
+from langchain_community.utilities import SQLDatabase
+from langchain_cratedb import CrateDBLoader
+
+# Connect to a self-managed CrateDB instance on localhost.
+CONNECTION_STRING = "crate://?schema=testdrive"
+
+db = SQLDatabase(engine=sa.create_engine(CONNECTION_STRING))
+
+loader = CrateDBLoader(
+    'SELECT * FROM sys.summits LIMIT 42',
+    db=db,
+)
+documents = loader.load()
+```
+
+### Chat Message History
+Use CrateDB as the storage for your chat messages.
+See also [CrateDBChatMessageHistory Tutorial].
+
+To use the chat message history in your applications:
+```python
+from langchain_cratedb import CrateDBChatMessageHistory
+
+# Connect to a self-managed CrateDB instance on localhost.
+CONNECTION_STRING = "crate://?schema=testdrive"
+
+message_history = CrateDBChatMessageHistory(
+    session_id="test-session",
+    connection=CONNECTION_STRING,
+)
+
+message_history.add_user_message("hi!")
+```
+
+
+[all features of CrateDB]: https://cratedb.com/docs/guide/feature/
+[CrateDB]: https://cratedb.com/database
+[CrateDB Cloud]: https://cratedb.com/database/cloud
+[CrateDB Cloud Console]: https://console.cratedb.cloud/?utm_source=langchain&utm_content=documentation
+[CrateDB installation options]: https://cratedb.com/docs/guide/install/
+[CrateDBChatMessageHistory Tutorial]: https://github.com/crate/cratedb-examples/blob/main/topic/machine-learning/llm-langchain/conversational_memory.ipynb
+[CrateDBLoader Tutorial]: https://github.com/crate/cratedb-examples/blob/main/topic/machine-learning/llm-langchain/document_loader.ipynb
+[CrateDBVectorStore Tutorial]: https://github.com/crate/cratedb-examples/blob/main/topic/machine-learning/llm-langchain/vector_search.ipynb
+[using LangChain with CrateDB]: https://cratedb.com/docs/guide/integrate/langchain/
diff --git a/libs/packages.yml b/libs/packages.yml
index da26ed6f0cfb8..e9f64be5a5eaa 100644
--- a/libs/packages.yml
+++ b/libs/packages.yml
@@ -143,6 +143,9 @@ packages:
   - name: langchain-couchbase
     repo: langchain-ai/langchain
     path: libs/partners/couchbase
+  - name: langchain-cratedb
+    repo: crate/langchain-cratedb
+    path: .
   - name: langchain-ollama
     repo: langchain-ai/langchain
     path: libs/partners/ollama