From c6440b67c79647324e220edac849919a95583e26 Mon Sep 17 00:00:00 2001
From: aditishree1 <141712869+aditishree1@users.noreply.github.com>
Date: Tue, 5 Nov 2024 23:20:01 +0530
Subject: [PATCH] feat(cosmosdbnosql): Add Semantic Cache Integration (#7033)

Co-authored-by: Yohan Lasorsa <noda@free.fr>
Co-authored-by: jacoblee93 <jacoblee93@gmail.com>
---
 .../llm_caching/azure_cosmosdb_nosql.mdx      |  40 +++
 .../docs/integrations/llm_caching/index.mdx   |  14 +
 .../docs/integrations/platforms/microsoft.mdx |  18 ++
 docs/core_docs/sidebars.js                    |  16 ++
 .../azure_cosmosdb_nosql.ts                   |  49 ++++
 .../src/azure_cosmosdb_nosql.ts               |   6 +-
 libs/langchain-azure-cosmosdb/src/caches.ts   | 191 ++++++++++++++
 libs/langchain-azure-cosmosdb/src/index.ts    |   1 +
 .../src/tests/caches.int.test.ts              | 244 ++++++++++++++++++
 .../src/tests/caches.test.ts                  |  67 +++++
 10 files changed, 643 insertions(+), 3 deletions(-)
 create mode 100644 docs/core_docs/docs/integrations/llm_caching/azure_cosmosdb_nosql.mdx
 create mode 100644 docs/core_docs/docs/integrations/llm_caching/index.mdx
 create mode 100644 examples/src/caches/azure_cosmosdb_nosql/azure_cosmosdb_nosql.ts
 create mode 100644 libs/langchain-azure-cosmosdb/src/caches.ts
 create mode 100644 libs/langchain-azure-cosmosdb/src/tests/caches.int.test.ts
 create mode 100644 libs/langchain-azure-cosmosdb/src/tests/caches.test.ts
diff --git a/docs/core_docs/docs/integrations/llm_caching/azure_cosmosdb_nosql.mdx b/docs/core_docs/docs/integrations/llm_caching/azure_cosmosdb_nosql.mdx
new file mode 100644
index 000000000000..ecf82513a7ae
--- /dev/null
+++ b/docs/core_docs/docs/integrations/llm_caching/azure_cosmosdb_nosql.mdx
@@ -0,0 +1,40 @@
+# Azure Cosmos DB NoSQL Semantic Cache
+
+> The Semantic Cache feature is supported with Azure Cosmos DB for NoSQL integration, enabling users to retrieve cached responses based on semantic similarity between the user input and previously cached results. It leverages [AzureCosmosDBNoSQLVectorStore](/docs/integrations/vectorstores/azure_cosmosdb_nosql), which stores vector embeddings of cached prompts. These embeddings enable similarity-based searches, allowing the system to retrieve relevant cached results.
+
+If you don't have an Azure account, you can [create a free account](https://azure.microsoft.com/free/) to get started.
+
+## Setup
+
+You'll first need to install the [`@langchain/azure-cosmosdb`](https://www.npmjs.com/package/@langchain/azure-cosmosdb) package:
+
+import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx";
+
+<IntegrationInstallTooltip></IntegrationInstallTooltip>
+
+```bash npm2yarn
+npm install @langchain/azure-cosmosdb @langchain/core
+```
+
+You'll also need to have an Azure Cosmos DB for NoSQL instance running. You can deploy a free version on Azure Portal without any cost, following [this guide](https://learn.microsoft.com/azure/cosmos-db/nosql/quickstart-portal).
+
+Once you have your instance running, make sure you have the connection string. If you are using Managed Identity, you need to have the endpoint. You can find them in the Azure Portal, under the "Settings / Keys" section of your instance.
+
+import CodeBlock from "@theme/CodeBlock";
+
+:::info
+
+When using Azure Managed Identity and role-based access control, you must ensure that the database and container have been created beforehand. RBAC does not provide permissions to create databases and containers. You can get more information about the permission model in the [Azure Cosmos DB documentation](https://learn.microsoft.com/azure/cosmos-db/how-to-setup-rbac#permission-model).
+
+:::
+
+## Usage example
+
+import Example from "@examples/caches/azure_cosmosdb_nosql/azure_cosmosdb_nosql.ts";
+
+<CodeBlock language="typescript">{Example}</CodeBlock>
+
+## Related
+
+- Vector store [conceptual guide](/docs/concepts/#vectorstores)
+- Vector store [how-to guides](/docs/how_to/#vectorstores)
diff --git a/docs/core_docs/docs/integrations/llm_caching/index.mdx b/docs/core_docs/docs/integrations/llm_caching/index.mdx
new file mode 100644
index 000000000000..f1f5f6702ad8
--- /dev/null
+++ b/docs/core_docs/docs/integrations/llm_caching/index.mdx
@@ -0,0 +1,14 @@
+---
+sidebar_class_name: hidden
+hide_table_of_contents: true
+---
+
+# Model caches
+
+[Caching LLM calls](/docs/how_to/chat_model_caching) can be useful for testing, cost savings, and speed.
+
+Below are some integrations that allow you to cache results of individual LLM calls using different caches with different strategies.
+
+import { IndexTable } from "@theme/FeatureTables";
+
+<IndexTable />
diff --git a/docs/core_docs/docs/integrations/platforms/microsoft.mdx b/docs/core_docs/docs/integrations/platforms/microsoft.mdx
index b048323e04a4..e9f3d7fd4922 100644
--- a/docs/core_docs/docs/integrations/platforms/microsoft.mdx
+++ b/docs/core_docs/docs/integrations/platforms/microsoft.mdx
@@ -132,6 +132,24 @@ See a [usage example](/docs/integrations/vectorstores/azure_cosmosdb_mongodb).
 import { AzureCosmosDBMongoDBVectorStore } from "@langchain/azure-cosmosdb";
 ```
 
+## Semantic Cache
+
+### Azure Cosmos DB NoSQL Semantic Cache
+
+> The Semantic Cache feature is supported with Azure Cosmos DB for NoSQL integration, enabling users to retrieve cached responses based on semantic similarity between the user input and previously cached results. It leverages [AzureCosmosDBNoSQLVectorStore](/docs/integrations/vectorstores/azure_cosmosdb_nosql), which stores vector embeddings of cached prompts. These embeddings enable similarity-based searches, allowing the system to retrieve relevant cached results.
+
+<IntegrationInstallTooltip></IntegrationInstallTooltip>
+
+```bash npm2yarn
+npm install @langchain/azure-cosmosdb @langchain/core
+```
+
+See a [usage example](/docs/integrations/llm_caching/azure_cosmosdb_nosql).
+
+```typescript
+import { AzureCosmosDBNoSQLSemanticCache } from "@langchain/azure-cosmosdb";
+```
+
 ## Document loaders
 
 ### Azure Blob Storage
diff --git a/docs/core_docs/sidebars.js b/docs/core_docs/sidebars.js
index 851912174a54..95bf57ec5859 100644
--- a/docs/core_docs/sidebars.js
+++ b/docs/core_docs/sidebars.js
@@ -347,6 +347,22 @@ module.exports = {
                 slug: "integrations/document_transformers",
               },
             },
+            {
+              type: "category",
+              label: "Model caches",
+              collapsible: false,
+              items: [
+                {
+                  type: "autogenerated",
+                  dirName: "integrations/llm_caching",
+                  className: "hidden",
+                },
+              ],
+              link: {
+                type: "doc",
+                id: "integrations/llm_caching/index",
+              },
+            },
             {
               type: "category",
               label: "Graphs",
diff --git a/examples/src/caches/azure_cosmosdb_nosql/azure_cosmosdb_nosql.ts b/examples/src/caches/azure_cosmosdb_nosql/azure_cosmosdb_nosql.ts
new file mode 100644
index 000000000000..3797b11b1144
--- /dev/null
+++ b/examples/src/caches/azure_cosmosdb_nosql/azure_cosmosdb_nosql.ts
@@ -0,0 +1,49 @@
+import {
+  AzureCosmosDBNoSQLConfig,
+  AzureCosmosDBNoSQLSemanticCache,
+} from "@langchain/azure-cosmosdb";
+import { ChatOpenAI, OpenAIEmbeddings } from "@langchain/openai";
+
+const embeddings = new OpenAIEmbeddings();
+const config: AzureCosmosDBNoSQLConfig = {
+  databaseName: "<DATABASE_NAME>",
+  containerName: "<CONTAINER_NAME>",
+  // use endpoint to initiate client with managed identity
+  connectionString: "<CONNECTION_STRING>",
+};
+
+/**
+ * Sets the threshold similarity score for returning cached results based on vector distance.
+ * Cached output is returned only if the similarity score meets or exceeds this threshold;
+ * otherwise, a new result is generated. Default is 0.6, adjustable via the constructor
+ * to suit various distance functions and use cases.
+ * (see: https://learn.microsoft.com/azure/cosmos-db/nosql/query/vectordistance).
+ */
+
+const similarityScoreThreshold = 0.5;
+const cache = new AzureCosmosDBNoSQLSemanticCache(
+  embeddings,
+  config,
+  similarityScoreThreshold
+);
+
+const model = new ChatOpenAI({ cache });
+
+// Invoke the model to perform an action
+const response1 = await model.invoke("Do something random!");
+console.log(response1);
+/*
+  AIMessage {
+    content: "Sure! I'll generate a random number for you: 37",
+    additional_kwargs: {}
+  }
+*/
+
+const response2 = await model.invoke("Do something random!");
+console.log(response2);
+/*
+  AIMessage {
+    content: "Sure! I'll generate a random number for you: 37",
+    additional_kwargs: {}
+  }
+*/
diff --git a/libs/langchain-azure-cosmosdb/src/azure_cosmosdb_nosql.ts b/libs/langchain-azure-cosmosdb/src/azure_cosmosdb_nosql.ts
index fdd287047278..618d43ab64c9 100644
--- a/libs/langchain-azure-cosmosdb/src/azure_cosmosdb_nosql.ts
+++ b/libs/langchain-azure-cosmosdb/src/azure_cosmosdb_nosql.ts
@@ -78,7 +78,7 @@ export interface AzureCosmosDBNoSQLConfig
   readonly metadataKey?: string;
 }
 
-const USER_AGENT_PREFIX = "langchainjs-azure-cosmosdb-nosql";
+const USER_AGENT_SUFFIX = "langchainjs-cdbnosql-vectorstore-javascript";
 
 /**
  * Azure Cosmos DB for NoSQL vCore vector store.
@@ -151,14 +151,14 @@ export class AzureCosmosDBNoSQLVectorStore extends VectorStore {
         this.client = new CosmosClient({
           endpoint,
           key,
-          userAgentSuffix: USER_AGENT_PREFIX,
+          userAgentSuffix: USER_AGENT_SUFFIX,
         });
       } else {
         // Use managed identity
         this.client = new CosmosClient({
           endpoint,
           aadCredentials: dbConfig.credentials ?? new DefaultAzureCredential(),
-          userAgentSuffix: USER_AGENT_PREFIX,
+          userAgentSuffix: USER_AGENT_SUFFIX,
         } as CosmosClientOptions);
       }
     }
diff --git a/libs/langchain-azure-cosmosdb/src/caches.ts b/libs/langchain-azure-cosmosdb/src/caches.ts
new file mode 100644
index 000000000000..da7619c5ff96
--- /dev/null
+++ b/libs/langchain-azure-cosmosdb/src/caches.ts
@@ -0,0 +1,191 @@
+import {
+  BaseCache,
+  deserializeStoredGeneration,
+  getCacheKey,
+  serializeGeneration,
+} from "@langchain/core/caches";
+import { Generation } from "@langchain/core/outputs";
+import { Document } from "@langchain/core/documents";
+import { EmbeddingsInterface } from "@langchain/core/embeddings";
+import { CosmosClient, CosmosClientOptions } from "@azure/cosmos";
+import { DefaultAzureCredential } from "@azure/identity";
+import { getEnvironmentVariable } from "@langchain/core/utils/env";
+import {
+  AzureCosmosDBNoSQLConfig,
+  AzureCosmosDBNoSQLVectorStore,
+} from "./azure_cosmosdb_nosql.js";
+
+const USER_AGENT_SUFFIX = "langchainjs-cdbnosql-semanticcache-javascript";
+const DEFAULT_CONTAINER_NAME = "semanticCacheContainer";
+
+/**
+ * Represents a Semantic Cache that uses CosmosDB NoSQL backend as the underlying
+ * storage system.
+ *
+ * @example
+ * ```typescript
+ * const embeddings = new OpenAIEmbeddings();
+ * const cache = new AzureCosmosDBNoSQLSemanticCache(embeddings, {
+ *   databaseName: DATABASE_NAME,
+ *   containerName: CONTAINER_NAME
+ * });
+ * const model = new ChatOpenAI({cache});
+ *
+ * // Invoke the model to perform an action
+ * const response = await model.invoke("Do something random!");
+ * console.log(response);
+ * ```
+ */
+export class AzureCosmosDBNoSQLSemanticCache extends BaseCache {
+  private embeddings: EmbeddingsInterface;
+
+  private config: AzureCosmosDBNoSQLConfig;
+
+  private similarityScoreThreshold: number;
+
+  private cacheDict: { [key: string]: AzureCosmosDBNoSQLVectorStore } = {};
+
+  private vectorDistanceFunction: string;
+
+  constructor(
+    embeddings: EmbeddingsInterface,
+    dbConfig: AzureCosmosDBNoSQLConfig,
+    similarityScoreThreshold: number = 0.6
+  ) {
+    super();
+    let client: CosmosClient;
+
+    const connectionString =
+      dbConfig.connectionString ??
+      getEnvironmentVariable("AZURE_COSMOSDB_NOSQL_CONNECTION_STRING");
+
+    const endpoint =
+      dbConfig.endpoint ??
+      getEnvironmentVariable("AZURE_COSMOSDB_NOSQL_ENDPOINT");
+
+    if (!dbConfig.client && !connectionString && !endpoint) {
+      throw new Error(
+        "AzureCosmosDBNoSQLSemanticCache client, connection string or endpoint must be set."
+      );
+    }
+
+    if (!dbConfig.client) {
+      if (connectionString) {
+        // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+        let [endpoint, key] = connectionString!.split(";");
+        [, endpoint] = endpoint.split("=");
+        [, key] = key.split("=");
+
+        client = new CosmosClient({
+          endpoint,
+          key,
+          userAgentSuffix: USER_AGENT_SUFFIX,
+        });
+      } else {
+        // Use managed identity
+        client = new CosmosClient({
+          endpoint,
+          aadCredentials: dbConfig.credentials ?? new DefaultAzureCredential(),
+          userAgentSuffix: USER_AGENT_SUFFIX,
+        } as CosmosClientOptions);
+      }
+    } else {
+      client = dbConfig.client;
+    }
+
+    this.vectorDistanceFunction =
+      dbConfig.vectorEmbeddingPolicy?.vectorEmbeddings[0].distanceFunction ??
+      "cosine";
+
+    this.config = {
+      ...dbConfig,
+      client,
+      databaseName: dbConfig.databaseName,
+      containerName: dbConfig.containerName ?? DEFAULT_CONTAINER_NAME,
+    };
+    this.embeddings = embeddings;
+    this.similarityScoreThreshold = similarityScoreThreshold;
+  }
+
+  private getLlmCache(llmKey: string) {
+    const key = getCacheKey(llmKey);
+    if (!this.cacheDict[key]) {
+      this.cacheDict[key] = new AzureCosmosDBNoSQLVectorStore(
+        this.embeddings,
+        this.config
+      );
+    }
+    return this.cacheDict[key];
+  }
+
+  /**
+   * Retrieves data from the cache.
+   *
+   * @param prompt The prompt for lookup.
+   * @param llmKey The LLM key used to construct the cache key.
+   * @returns An array of Generations if found, null otherwise.
+   */
+  public async lookup(prompt: string, llmKey: string) {
+    const llmCache = this.getLlmCache(llmKey);
+
+    const results = await llmCache.similaritySearchWithScore(prompt, 1);
+    if (!results.length) return null;
+
+    const generations = results
+      .flatMap(([document, score]) => {
+        const isSimilar =
+          (this.vectorDistanceFunction === "euclidean" &&
+            score <= this.similarityScoreThreshold) ||
+          (this.vectorDistanceFunction !== "euclidean" &&
+            score >= this.similarityScoreThreshold);
+
+        if (!isSimilar) return undefined;
+
+        return document.metadata.return_value.map((gen: string) =>
+          deserializeStoredGeneration(JSON.parse(gen))
+        );
+      })
+      .filter((gen) => gen !== undefined);
+
+    return generations.length > 0 ? generations : null;
+  }
+
+  /**
+   * Updates the cache with new data.
+   *
+   * @param prompt The prompt for update.
+   * @param llmKey The LLM key used to construct the cache key.
+   * @param value The value to be stored in the cache.
+   */
+  public async update(
+    prompt: string,
+    llmKey: string,
+    returnValue: Generation[]
+  ) {
+    const serializedGenerations = returnValue.map((generation) =>
+      JSON.stringify(serializeGeneration(generation))
+    );
+    const llmCache = this.getLlmCache(llmKey);
+    const metadata = {
+      llm_string: llmKey,
+      prompt,
+      return_value: serializedGenerations,
+    };
+    const doc = new Document({
+      pageContent: prompt,
+      metadata,
+    });
+    await llmCache.addDocuments([doc]);
+  }
+
+  /**
+   * deletes the semantic cache for a given llmKey
+   * @param llmKey
+   */
+  public async clear(llmKey: string) {
+    const key = getCacheKey(llmKey);
+    if (this.cacheDict[key]) {
+      await this.cacheDict[key].delete();
+    }
+  }
+}
diff --git a/libs/langchain-azure-cosmosdb/src/index.ts b/libs/langchain-azure-cosmosdb/src/index.ts
index 04a6453c00c3..e1160c548ef9 100644
--- a/libs/langchain-azure-cosmosdb/src/index.ts
+++ b/libs/langchain-azure-cosmosdb/src/index.ts
@@ -1,2 +1,3 @@
 export * from "./azure_cosmosdb_mongodb.js";
 export * from "./azure_cosmosdb_nosql.js";
+export * from "./caches.js";
diff --git a/libs/langchain-azure-cosmosdb/src/tests/caches.int.test.ts b/libs/langchain-azure-cosmosdb/src/tests/caches.int.test.ts
new file mode 100644
index 000000000000..d6b66ddaac05
--- /dev/null
+++ b/libs/langchain-azure-cosmosdb/src/tests/caches.int.test.ts
@@ -0,0 +1,244 @@
+/* eslint-disable no-process-env */
+/* eslint-disable @typescript-eslint/no-explicit-any */
+
+import {
+  CosmosClient,
+  IndexingMode,
+  VectorEmbeddingPolicy,
+} from "@azure/cosmos";
+import { DefaultAzureCredential } from "@azure/identity";
+import { ChatOpenAI, OpenAIEmbeddings } from "@langchain/openai";
+import { AzureCosmosDBNoSQLSemanticCache } from "../caches.js";
+
+const DATABASE_NAME = "langchainTestCacheDB";
+const CONTAINER_NAME = "testContainer";
+
+function indexingPolicy(indexType: any) {
+  return {
+    indexingMode: IndexingMode.consistent,
+    includedPaths: [{ path: "/*" }],
+    excludedPaths: [{ path: '/"_etag"/?' }],
+    vectorIndexes: [{ path: "/embedding", type: indexType }],
+  };
+}
+
+function vectorEmbeddingPolicy(
+  distanceFunction: "euclidean" | "cosine" | "dotproduct",
+  dimension: number
+): VectorEmbeddingPolicy {
+  return {
+    vectorEmbeddings: [
+      {
+        path: "/embedding",
+        dataType: "float32",
+        distanceFunction,
+        dimensions: dimension,
+      },
+    ],
+  };
+}
+
+async function initializeCache(
+  indexType: any,
+  distanceFunction: any,
+  similarityThreshold?: number
+): Promise<AzureCosmosDBNoSQLSemanticCache> {
+  let cache: AzureCosmosDBNoSQLSemanticCache;
+  const embeddingModel = new OpenAIEmbeddings();
+  const testEmbedding = await embeddingModel.embedDocuments(["sample text"]);
+  const dimension = Math.min(
+    testEmbedding[0].length,
+    indexType === "flat" ? 505 : 4096
+  );
+  if (process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING) {
+    cache = new AzureCosmosDBNoSQLSemanticCache(
+      new OpenAIEmbeddings(),
+      {
+        databaseName: DATABASE_NAME,
+        containerName: CONTAINER_NAME,
+        connectionString: process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING,
+        indexingPolicy: indexingPolicy(indexType),
+        vectorEmbeddingPolicy: vectorEmbeddingPolicy(
+          distanceFunction,
+          dimension
+        ),
+      },
+      similarityThreshold
+    );
+  } else if (process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT) {
+    cache = new AzureCosmosDBNoSQLSemanticCache(
+      new OpenAIEmbeddings(),
+      {
+        databaseName: DATABASE_NAME,
+        containerName: CONTAINER_NAME,
+        endpoint: process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT,
+        indexingPolicy: indexingPolicy(indexType),
+        vectorEmbeddingPolicy: vectorEmbeddingPolicy(
+          distanceFunction,
+          dimension
+        ),
+      },
+      similarityThreshold
+    );
+  } else {
+    throw new Error(
+      "Please set the environment variable AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT"
+    );
+  }
+  return cache;
+}
+
+/*
+ * To run this test, you need have an Azure Cosmos DB for NoSQL instance
+ * running. You can deploy a free version on Azure Portal without any cost,
+ * following this guide:
+ * https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search
+ *
+ * You do not need to create a database or collection, it will be created
+ * automatically by the test.
+ *
+ * Once you have the instance running, you need to set the following environment
+ * variables before running the test:
+ * - AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT
+ * - AZURE_OPENAI_API_KEY
+ * - AZURE_OPENAI_API_INSTANCE_NAME
+ * - AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME
+ * - AZURE_OPENAI_API_VERSION
+ */
+describe("Azure CosmosDB NoSQL Semantic Cache", () => {
+  beforeEach(async () => {
+    let client: CosmosClient;
+
+    if (process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING) {
+      client = new CosmosClient(
+        process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING
+      );
+    } else if (process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT) {
+      client = new CosmosClient({
+        endpoint: process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT,
+        aadCredentials: new DefaultAzureCredential(),
+      });
+    } else {
+      throw new Error(
+        "Please set the environment variable AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT"
+      );
+    }
+
+    // Make sure the database does not exists
+    try {
+      await client.database(DATABASE_NAME).delete();
+    } catch {
+      // Ignore error if the database does not exist
+    }
+  });
+
+  it("test AzureCosmosDBNoSqlSemanticCache with cosine quantizedFlat", async () => {
+    const cache = await initializeCache("quantizedFlat", "cosine");
+    const model = new ChatOpenAI({ cache });
+    const llmString = JSON.stringify(model._identifyingParams);
+    await cache.update("foo", llmString, [{ text: "fizz" }]);
+
+    let cacheOutput = await cache.lookup("foo", llmString);
+    expect(cacheOutput).toEqual([{ text: "fizz" }]);
+
+    cacheOutput = await cache.lookup("bar", llmString);
+    expect(cacheOutput).toEqual(null);
+
+    await cache.clear(llmString);
+  });
+
+  it("test AzureCosmosDBNoSqlSemanticCache with cosine flat", async () => {
+    const cache = await initializeCache("flat", "cosine");
+    const model = new ChatOpenAI({ cache });
+    const llmString = JSON.stringify(model._identifyingParams);
+    await cache.update("foo", llmString, [{ text: "fizz" }]);
+
+    let cacheOutput = await cache.lookup("foo", llmString);
+    expect(cacheOutput).toEqual([{ text: "fizz" }]);
+
+    cacheOutput = await cache.lookup("bar", llmString);
+    expect(cacheOutput).toEqual(null);
+
+    await cache.clear(llmString);
+  });
+
+  it("test AzureCosmosDBNoSqlSemanticCache with dotProduct quantizedFlat", async () => {
+    const cache = await initializeCache("quantizedFlat", "dotproduct");
+    const model = new ChatOpenAI({ cache });
+    const llmString = JSON.stringify(model._identifyingParams);
+    await cache.update("foo", llmString, [{ text: "fizz" }]);
+
+    let cacheOutput = await cache.lookup("foo", llmString);
+    expect(cacheOutput).toEqual([{ text: "fizz" }]);
+
+    cacheOutput = await cache.lookup("bar", llmString);
+    expect(cacheOutput).toEqual(null);
+
+    await cache.clear(llmString);
+  });
+
+  it("test AzureCosmosDBNoSqlSemanticCache with dotProduct flat", async () => {
+    const cache = await initializeCache("flat", "cosine");
+    const model = new ChatOpenAI({ cache });
+    const llmString = JSON.stringify(model._identifyingParams);
+    await cache.update("foo", llmString, [{ text: "fizz" }]);
+
+    let cacheOutput = await cache.lookup("foo", llmString);
+    expect(cacheOutput).toEqual([{ text: "fizz" }]);
+
+    cacheOutput = await cache.lookup("bar", llmString);
+    expect(cacheOutput).toEqual(null);
+
+    await cache.clear(llmString);
+  });
+
+  it("test AzureCosmosDBNoSqlSemanticCache with euclidean quantizedFlat", async () => {
+    const cache = await initializeCache("quantizedFlat", "euclidean");
+    const model = new ChatOpenAI({ cache });
+    const llmString = JSON.stringify(model._identifyingParams);
+    await cache.update("foo", llmString, [{ text: "fizz" }]);
+
+    let cacheOutput = await cache.lookup("foo", llmString);
+    expect(cacheOutput).toEqual([{ text: "fizz" }]);
+
+    cacheOutput = await cache.lookup("bar", llmString);
+    expect(cacheOutput).toEqual(null);
+
+    await cache.clear(llmString);
+  });
+
+  it("test AzureCosmosDBNoSqlSemanticCache with euclidean flat", async () => {
+    const cache = await initializeCache("flat", "euclidean");
+    const model = new ChatOpenAI({ cache });
+    const llmString = JSON.stringify(model._identifyingParams);
+    await cache.update("foo", llmString, [{ text: "fizz" }]);
+
+    let cacheOutput = await cache.lookup("foo", llmString);
+    expect(cacheOutput).toEqual([{ text: "fizz" }]);
+
+    cacheOutput = await cache.lookup("bar", llmString);
+    expect(cacheOutput).toEqual(null);
+
+    await cache.clear(llmString);
+  });
+
+  it("test AzureCosmosDBNoSqlSemanticCache response according to similarity score", async () => {
+    const cache = await initializeCache("quantizedFlat", "cosine");
+    const model = new ChatOpenAI({ cache });
+    const response1 = await model.invoke(
+      "Where is the headquarter of Microsoft?"
+    );
+    console.log(response1.content);
+    // gives similarity score of 0.56 which is less than the threshold of 0.6. The cache
+    // will retun null which will allow the model to generate result.
+    const response2 = await model.invoke(
+      "List all Microsoft offices in India."
+    );
+    expect(response2.content).not.toEqual(response1.content);
+    console.log(response2.content);
+    // gives similarity score of .63 > 0.6
+    const response3 = await model.invoke("Tell me something about Microsoft");
+    expect(response3.content).toEqual(response1.content);
+    console.log(response3.content);
+  });
+});
diff --git a/libs/langchain-azure-cosmosdb/src/tests/caches.test.ts b/libs/langchain-azure-cosmosdb/src/tests/caches.test.ts
new file mode 100644
index 000000000000..9de3f507acc0
--- /dev/null
+++ b/libs/langchain-azure-cosmosdb/src/tests/caches.test.ts
@@ -0,0 +1,67 @@
+/* eslint-disable @typescript-eslint/no-explicit-any */
+import { jest } from "@jest/globals";
+import { FakeEmbeddings, FakeLLM } from "@langchain/core/utils/testing";
+import { AzureCosmosDBNoSQLSemanticCache } from "../index.js";
+
+// Create the mock Cosmos DB client
+const createMockClient = () => {
+  let id = 0;
+  const client = {
+    databases: {
+      createIfNotExists: jest.fn().mockReturnThis(),
+      get database() {
+        return this;
+      },
+      containers: {
+        createIfNotExists: jest.fn().mockReturnThis(),
+        get container() {
+          return this;
+        },
+        items: {
+          create: jest.fn().mockImplementation((doc: any) => ({
+            // eslint-disable-next-line no-plusplus
+            resource: { id: doc.id ?? `${id++}` },
+          })),
+          query: jest.fn().mockReturnThis(),
+          fetchAll: jest.fn().mockImplementation(() => ({
+            resources: [
+              {
+                metadata: {
+                  return_value: ['{"text": "fizz"}'], // Simulate stored serialized generation
+                },
+                similarityScore: 0.8,
+              },
+            ],
+          })),
+        },
+        item: jest.fn().mockReturnThis(),
+        delete: jest.fn(),
+      },
+    },
+  };
+  return client;
+};
+
+describe("AzureCosmosDBNoSQLSemanticCache", () => {
+  it("should store, retrieve, and clear cache", async () => {
+    const client = createMockClient();
+    const embeddings = new FakeEmbeddings();
+    const cache = new AzureCosmosDBNoSQLSemanticCache(embeddings, {
+      client: client as any,
+    });
+    expect(cache).toBeDefined();
+
+    const llm = new FakeLLM({});
+    const llmString = JSON.stringify(llm._identifyingParams());
+
+    await cache.update("foo", llmString, [{ text: "fizz" }]);
+    expect(client.databases.containers.items.create).toHaveBeenCalled();
+
+    const result = await cache.lookup("foo", llmString);
+    expect(result).toEqual([{ text: "fizz" }]);
+    expect(client.databases.containers.items.query).toHaveBeenCalled();
+
+    await cache.clear(llmString);
+    expect(client.databases.containers.delete).toHaveBeenCalled();
+  });
+});