diff --git a/libs/langchain-aws/src/tests/chat_models.int.test.ts b/libs/langchain-aws/src/tests/chat_models.int.test.ts
index bd8d547b90c2..fda040eb56ff 100644
--- a/libs/langchain-aws/src/tests/chat_models.int.test.ts
+++ b/libs/langchain-aws/src/tests/chat_models.int.test.ts
@@ -1,10 +1,14 @@
 /* eslint-disable no-process-env */
+
 import { test, expect } from "@jest/globals";
 import { AIMessageChunk, HumanMessage } from "@langchain/core/messages";
 import { tool } from "@langchain/core/tools";
 import { z } from "zod";
 import { ChatBedrockConverse } from "../chat_models.js";
 
+// Save the original value of the 'LANGCHAIN_CALLBACKS_BACKGROUND' environment variable
+const originalBackground = process.env.LANGCHAIN_CALLBACKS_BACKGROUND;
+
 const baseConstructorArgs: Partial<
   ConstructorParameters<typeof ChatBedrockConverse>[0]
 > = {
@@ -44,28 +48,38 @@ test("Test ChatBedrockConverse stream method", async () => {
 });
 
 test("Test ChatBedrockConverse in streaming mode", async () => {
-  let nrNewTokens = 0;
-  let streamedCompletion = "";
+  // Running LangChain callbacks in the background will sometimes cause the callbackManager to execute
+  // after the test/llm call has already finished & returned. Set that environment variable to false
+  // to prevent that from happening.
+  process.env.LANGCHAIN_CALLBACKS_BACKGROUND = "false";
 
-  const model = new ChatBedrockConverse({
-    ...baseConstructorArgs,
-    streaming: true,
-    maxTokens: 10,
-    callbacks: [
-      {
-        async handleLLMNewToken(token: string) {
-          nrNewTokens += 1;
-          streamedCompletion += token;
+  try {
+    let nrNewTokens = 0;
+    let streamedCompletion = "";
+
+    const model = new ChatBedrockConverse({
+      ...baseConstructorArgs,
+      streaming: true,
+      maxTokens: 10,
+      callbacks: [
+        {
+          async handleLLMNewToken(token: string) {
+            nrNewTokens += 1;
+            streamedCompletion += token;
+          },
         },
-      },
-    ],
-  });
-  const message = new HumanMessage("Hello!");
-  const result = await model.invoke([message]);
-  console.log(result);
+      ],
+    });
+    const message = new HumanMessage("Hello!");
+    const result = await model.invoke([message]);
+    console.log(result);
 
-  expect(nrNewTokens > 0).toBe(true);
-  expect(result.content).toBe(streamedCompletion);
+    expect(nrNewTokens > 0).toBe(true);
+    expect(result.content).toBe(streamedCompletion);
+  } finally {
+    // Reset the environment variable
+    process.env.LANGCHAIN_CALLBACKS_BACKGROUND = originalBackground;
+  }
 }, 10000);
 
 test("Test ChatBedrockConverse with stop", async () => {
diff --git a/libs/langchain-cloudflare/src/tests/chat_models.int.test.ts b/libs/langchain-cloudflare/src/tests/chat_models.int.test.ts
index 4d97f5d5fc5d..201a515a933f 100644
--- a/libs/langchain-cloudflare/src/tests/chat_models.int.test.ts
+++ b/libs/langchain-cloudflare/src/tests/chat_models.int.test.ts
@@ -1,3 +1,5 @@
+/* eslint-disable no-process-env */
+
 import { describe, test } from "@jest/globals";
 import { ChatMessage, HumanMessage } from "@langchain/core/messages";
 import {
@@ -10,6 +12,9 @@ import {
 import { getEnvironmentVariable } from "@langchain/core/utils/env";
 import { ChatCloudflareWorkersAI } from "../chat_models.js";
 
+// Save the original value of the 'LANGCHAIN_CALLBACKS_BACKGROUND' environment variable
+const originalBackground = process.env.LANGCHAIN_CALLBACKS_BACKGROUND;
+
 describe("ChatCloudflareWorkersAI", () => {
   test("call", async () => {
     const chat = new ChatCloudflareWorkersAI();
@@ -26,22 +31,32 @@ describe("ChatCloudflareWorkersAI", () => {
   });
 
   test("generate with streaming true", async () => {
-    const chat = new ChatCloudflareWorkersAI({
-      streaming: true,
-    });
-    const message = new HumanMessage("What is 2 + 2?");
-    const tokens: string[] = [];
-    const res = await chat.generate([[message]], {
-      callbacks: [
-        {
-          handleLLMNewToken: (token) => {
-            tokens.push(token);
+    // Running LangChain callbacks in the background will sometimes cause the callbackManager to execute
+    // after the test/llm call has already finished & returned. Set that environment variable to false
+    // to prevent that from happening.
+    process.env.LANGCHAIN_CALLBACKS_BACKGROUND = "false";
+
+    try {
+      const chat = new ChatCloudflareWorkersAI({
+        streaming: true,
+      });
+      const message = new HumanMessage("What is 2 + 2?");
+      const tokens: string[] = [];
+      const res = await chat.generate([[message]], {
+        callbacks: [
+          {
+            handleLLMNewToken: (token) => {
+              tokens.push(token);
+            },
           },
-        },
-      ],
-    });
-    expect(tokens.length).toBeGreaterThan(1);
-    expect(tokens.join("")).toEqual(res.generations[0][0].text);
+        ],
+      });
+      expect(tokens.length).toBeGreaterThan(1);
+      expect(tokens.join("")).toEqual(res.generations[0][0].text);
+    } finally {
+      // Reset the environment variable
+      process.env.LANGCHAIN_CALLBACKS_BACKGROUND = originalBackground;
+    }
   });
 
   test("stream", async () => {
diff --git a/libs/langchain-cloudflare/src/tests/llms.int.test.ts b/libs/langchain-cloudflare/src/tests/llms.int.test.ts
index 0b8d5ca06cca..18c4665cb059 100644
--- a/libs/langchain-cloudflare/src/tests/llms.int.test.ts
+++ b/libs/langchain-cloudflare/src/tests/llms.int.test.ts
@@ -1,7 +1,12 @@
+/* eslint-disable no-process-env */
+
 import { test } from "@jest/globals";
 import { getEnvironmentVariable } from "@langchain/core/utils/env";
 import { CloudflareWorkersAI } from "../llms.js";
 
+// Save the original value of the 'LANGCHAIN_CALLBACKS_BACKGROUND' environment variable
+const originalBackground = process.env.LANGCHAIN_CALLBACKS_BACKGROUND;
+
 test("Test CloudflareWorkersAI", async () => {
   const model = new CloudflareWorkersAI({});
   const res = await model.invoke("1 + 1 =");
@@ -9,22 +14,32 @@ test("Test CloudflareWorkersAI", async () => {
 }, 50000);
 
 test("generate with streaming true", async () => {
-  const model = new CloudflareWorkersAI({
-    streaming: true,
-  });
-  const tokens: string[] = [];
-  const res = await model.invoke("What is 2 + 2?", {
-    callbacks: [
-      {
-        handleLLMNewToken: (token) => {
-          console.log(token);
-          tokens.push(token);
+  // Running LangChain callbacks in the background will sometimes cause the callbackManager to execute
+  // after the test/llm call has already finished & returned. Set that environment variable to false
+  // to prevent that from happening.
+  process.env.LANGCHAIN_CALLBACKS_BACKGROUND = "false";
+
+  try {
+    const model = new CloudflareWorkersAI({
+      streaming: true,
+    });
+    const tokens: string[] = [];
+    const res = await model.invoke("What is 2 + 2?", {
+      callbacks: [
+        {
+          handleLLMNewToken: (token) => {
+            console.log(token);
+            tokens.push(token);
+          },
         },
-      },
-    ],
-  });
-  expect(tokens.length).toBeGreaterThan(1);
-  expect(tokens.join("")).toEqual(res);
+      ],
+    });
+    expect(tokens.length).toBeGreaterThan(1);
+    expect(tokens.join("")).toEqual(res);
+  } finally {
+    // Reset the environment variable
+    process.env.LANGCHAIN_CALLBACKS_BACKGROUND = originalBackground;
+  }
 });
 
 test("Test CloudflareWorkersAI streaming", async () => {
diff --git a/libs/langchain-cohere/src/tests/llms.int.test.ts b/libs/langchain-cohere/src/tests/llms.int.test.ts
index 11f0858666b2..69a115a4e458 100644
--- a/libs/langchain-cohere/src/tests/llms.int.test.ts
+++ b/libs/langchain-cohere/src/tests/llms.int.test.ts
@@ -1,7 +1,11 @@
-/* eslint-disable no-promise-executor-return */
+/* eslint-disable no-promise-executor-return, no-process-env */
+
 import { test } from "@jest/globals";
 import { Cohere } from "../llms.js";
 
+// Save the original value of the 'LANGCHAIN_CALLBACKS_BACKGROUND' environment variable
+const originalBackground = process.env.LANGCHAIN_CALLBACKS_BACKGROUND;
+
 test("test invoke", async () => {
   const cohere = new Cohere({});
   const result = await cohere.invoke(
@@ -11,25 +15,35 @@ test("test invoke", async () => {
 });
 
 test("test invoke with callback", async () => {
-  const cohere = new Cohere({
-    model: "command-light",
-  });
-  const tokens: string[] = [];
-  const result = await cohere.invoke(
-    "What is a good name for a company that makes colorful socks?",
-    {
-      callbacks: [
-        {
-          handleLLMNewToken(token) {
-            tokens.push(token);
+  // Running LangChain callbacks in the background will sometimes cause the callbackManager to execute
+  // after the test/llm call has already finished & returned. Set that environment variable to false
+  // to prevent that from happening.
+  process.env.LANGCHAIN_CALLBACKS_BACKGROUND = "false";
+
+  try {
+    const cohere = new Cohere({
+      model: "command-light",
+    });
+    const tokens: string[] = [];
+    const result = await cohere.invoke(
+      "What is a good name for a company that makes colorful socks?",
+      {
+        callbacks: [
+          {
+            handleLLMNewToken(token) {
+              tokens.push(token);
+            },
           },
-        },
-      ],
-    }
-  );
-  // Not streaming, so we should only get one token
-  expect(tokens.length).toBe(1);
-  expect(result).toEqual(tokens.join(""));
+        ],
+      }
+    );
+    // Not streaming, so we should only get one token
+    expect(tokens.length).toBe(1);
+    expect(result).toEqual(tokens.join(""));
+  } finally {
+    // Reset the environment variable
+    process.env.LANGCHAIN_CALLBACKS_BACKGROUND = originalBackground;
+  }
 });
 
 test("should abort the request", async () => {
diff --git a/libs/langchain-google-genai/src/tests/chat_models.int.test.ts b/libs/langchain-google-genai/src/tests/chat_models.int.test.ts
index ef94f39594e2..5365ab09cc80 100644
--- a/libs/langchain-google-genai/src/tests/chat_models.int.test.ts
+++ b/libs/langchain-google-genai/src/tests/chat_models.int.test.ts
@@ -1,3 +1,5 @@
+/* eslint-disable no-process-env */
+
 import { test } from "@jest/globals";
 import * as fs from "node:fs/promises";
 import { fileURLToPath } from "node:url";
@@ -18,6 +20,9 @@ import { z } from "zod";
 import { FunctionDeclarationSchemaType } from "@google/generative-ai";
 import { ChatGoogleGenerativeAI } from "../chat_models.js";
 
+// Save the original value of the 'LANGCHAIN_CALLBACKS_BACKGROUND' environment variable
+const originalBackground = process.env.LANGCHAIN_CALLBACKS_BACKGROUND;
+
 const dummyToolResponse = `[{"title":"Weather in New York City","url":"https://www.weatherapi.com/","content":"{'location': {'name': 'New York', 'region': 'New York', 'country': 'United States of America', 'lat': 40.71, 'lon': -74.01, 'tz_id': 'America/New_York', 'localtime_epoch': 1718659486, 'localtime': '2024-06-17 17:24'}, 'current': {'last_updated_epoch': 1718658900, 'last_updated': '2024-06-17 17:15', 'temp_c': 27.8, 'temp_f': 82.0, 'is_day': 1, 'condition': {'text': 'Partly cloudy', 'icon': '//cdn.weatherapi.com/weather/64x64/day/116.png', 'code': 1003}, 'wind_mph': 2.2, 'wind_kph': 3.6, 'wind_degree': 159, 'wind_dir': 'SSE', 'pressure_mb': 1021.0, 'pressure_in': 30.15, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 58, 'cloud': 25, 'feelslike_c': 29.0, 'feelslike_f': 84.2, 'windchill_c': 26.9, 'windchill_f': 80.5, 'heatindex_c': 27.9, 'heatindex_f': 82.2, 'dewpoint_c': 17.1, 'dewpoint_f': 62.8, 'vis_km': 16.0, 'vis_miles': 9.0, 'uv': 7.0, 'gust_mph': 18.3, 'gust_kph': 29.4}}","score":0.98192,"raw_content":null},{"title":"New York, NY Monthly Weather | AccuWeather","url":"https://www.accuweather.com/en/us/new-york/10021/june-weather/349727","content":"Get the monthly weather forecast for New York, NY, including daily high/low, historical averages, to help you plan ahead.","score":0.97504,"raw_content":null}]`;
 
 test("Test Google AI", async () => {
@@ -90,62 +95,92 @@ test("Test Google AI multimodal generation", async () => {
 });
 
 test("Test Google AI handleLLMNewToken callback", async () => {
-  const model = new ChatGoogleGenerativeAI({});
-  let tokens = "";
-  const res = await model.call(
-    [new HumanMessage("what is 1 + 1?")],
-    undefined,
-    [
-      {
-        handleLLMNewToken(token: string) {
-          tokens += token;
+  // Running LangChain callbacks in the background will sometimes cause the callbackManager to execute
+  // after the test/llm call has already finished & returned. Set that environment variable to false
+  // to prevent that from happening.
+  process.env.LANGCHAIN_CALLBACKS_BACKGROUND = "false";
+
+  try {
+    const model = new ChatGoogleGenerativeAI({});
+    let tokens = "";
+    const res = await model.call(
+      [new HumanMessage("what is 1 + 1?")],
+      undefined,
+      [
+        {
+          handleLLMNewToken(token: string) {
+            tokens += token;
+          },
         },
-      },
-    ]
-  );
-  console.log({ tokens });
-  const responseContent = typeof res.content === "string" ? res.content : "";
-  expect(tokens).toBe(responseContent);
+      ]
+    );
+    console.log({ tokens });
+    const responseContent = typeof res.content === "string" ? res.content : "";
+    expect(tokens).toBe(responseContent);
+  } finally {
+    // Reset the environment variable
+    process.env.LANGCHAIN_CALLBACKS_BACKGROUND = originalBackground;
+  }
 });
 
 test("Test Google AI handleLLMNewToken callback with streaming", async () => {
-  const model = new ChatGoogleGenerativeAI({});
-  let tokens = "";
-  const res = await model.stream([new HumanMessage("what is 1 + 1?")], {
-    callbacks: [
-      {
-        handleLLMNewToken(token: string) {
-          tokens += token;
+  // Running LangChain callbacks in the background will sometimes cause the callbackManager to execute
+  // after the test/llm call has already finished & returned. Set that environment variable to false
+  // to prevent that from happening.
+  process.env.LANGCHAIN_CALLBACKS_BACKGROUND = "false";
+
+  try {
+    const model = new ChatGoogleGenerativeAI({});
+    let tokens = "";
+    const res = await model.stream([new HumanMessage("what is 1 + 1?")], {
+      callbacks: [
+        {
+          handleLLMNewToken(token: string) {
+            tokens += token;
+          },
         },
-      },
-    ],
-  });
-  console.log({ tokens });
-  let responseContent = "";
-  for await (const streamItem of res) {
-    responseContent += streamItem.content;
+      ],
+    });
+    console.log({ tokens });
+    let responseContent = "";
+    for await (const streamItem of res) {
+      responseContent += streamItem.content;
+    }
+    console.log({ tokens });
+    expect(tokens).toBe(responseContent);
+  } finally {
+    // Reset the environment variable
+    process.env.LANGCHAIN_CALLBACKS_BACKGROUND = originalBackground;
   }
-  console.log({ tokens });
-  expect(tokens).toBe(responseContent);
 });
 
 test("Test Google AI in streaming mode", async () => {
-  const model = new ChatGoogleGenerativeAI({ streaming: true });
-  let tokens = "";
-  let nrNewTokens = 0;
-  const res = await model.invoke([new HumanMessage("Write a haiku?")], {
-    callbacks: [
-      {
-        handleLLMNewToken(token: string) {
-          nrNewTokens += 1;
-          tokens += token;
+  // Running LangChain callbacks in the background will sometimes cause the callbackManager to execute
+  // after the test/llm call has already finished & returned. Set that environment variable to false
+  // to prevent that from happening.
+  process.env.LANGCHAIN_CALLBACKS_BACKGROUND = "false";
+
+  try {
+    const model = new ChatGoogleGenerativeAI({ streaming: true });
+    let tokens = "";
+    let nrNewTokens = 0;
+    const res = await model.invoke([new HumanMessage("Write a haiku?")], {
+      callbacks: [
+        {
+          handleLLMNewToken(token: string) {
+            nrNewTokens += 1;
+            tokens += token;
+          },
         },
-      },
-    ],
-  });
-  console.log({ tokens, nrNewTokens });
-  expect(nrNewTokens > 1).toBe(true);
-  expect(res.content).toBe(tokens);
+      ],
+    });
+    console.log({ tokens, nrNewTokens });
+    expect(nrNewTokens > 1).toBe(true);
+    expect(res.content).toBe(tokens);
+  } finally {
+    // Reset the environment variable
+    process.env.LANGCHAIN_CALLBACKS_BACKGROUND = originalBackground;
+  }
 });
 
 async function fileToBase64(filePath: string): Promise<string> {
diff --git a/libs/langchain-openai/src/tests/azure/llms.int.test.ts b/libs/langchain-openai/src/tests/azure/llms.int.test.ts
index c4c1baff9d62..fa91c27e5dc4 100644
--- a/libs/langchain-openai/src/tests/azure/llms.int.test.ts
+++ b/libs/langchain-openai/src/tests/azure/llms.int.test.ts
@@ -1,3 +1,5 @@
+/* eslint-disable no-process-env */
+
 import { test, expect } from "@jest/globals";
 import { LLMResult } from "@langchain/core/outputs";
 import { StringPromptValue } from "@langchain/core/prompt_values";
@@ -10,6 +12,9 @@ import {
 import { getEnvironmentVariable } from "@langchain/core/utils/env";
 import { AzureOpenAI } from "../../azure/llms.js";
 
+// Save the original value of the 'LANGCHAIN_CALLBACKS_BACKGROUND' environment variable
+const originalBackground = process.env.LANGCHAIN_CALLBACKS_BACKGROUND;
+
 test("Test Azure OpenAI invoke", async () => {
   const model = new AzureOpenAI({
     maxTokens: 5,
@@ -136,25 +141,35 @@ test("Test Azure OpenAI with versioned instruct model returns Azure OpenAI", asy
 });
 
 test("Test Azure OpenAI tokenUsage", async () => {
-  let tokenUsage = {
-    completionTokens: 0,
-    promptTokens: 0,
-    totalTokens: 0,
-  };
+  // Running LangChain callbacks in the background will sometimes cause the callbackManager to execute
+  // after the test/llm call has already finished & returned. Set that environment variable to false
+  // to prevent that from happening.
+  process.env.LANGCHAIN_CALLBACKS_BACKGROUND = "false";
+
+  try {
+    let tokenUsage = {
+      completionTokens: 0,
+      promptTokens: 0,
+      totalTokens: 0,
+    };
 
-  const model = new AzureOpenAI({
-    maxTokens: 5,
-    modelName: "gpt-3.5-turbo-instruct",
-    callbackManager: CallbackManager.fromHandlers({
-      async handleLLMEnd(output: LLMResult) {
-        tokenUsage = output.llmOutput?.tokenUsage;
-      },
-    }),
-  });
-  const res = await model.invoke("Hello");
-  console.log({ res });
+    const model = new AzureOpenAI({
+      maxTokens: 5,
+      modelName: "gpt-3.5-turbo-instruct",
+      callbackManager: CallbackManager.fromHandlers({
+        async handleLLMEnd(output: LLMResult) {
+          tokenUsage = output.llmOutput?.tokenUsage;
+        },
+      }),
+    });
+    const res = await model.invoke("Hello");
+    console.log({ res });
 
-  expect(tokenUsage.promptTokens).toBe(1);
+    expect(tokenUsage.promptTokens).toBe(1);
+  } finally {
+    // Reset the environment variable
+    process.env.LANGCHAIN_CALLBACKS_BACKGROUND = originalBackground;
+  }
 });
 
 test("Test Azure OpenAI in streaming mode", async () => {
diff --git a/libs/langchain-openai/src/tests/legacy.int.test.ts b/libs/langchain-openai/src/tests/legacy.int.test.ts
index 94fda49dbe41..533a83fa488c 100644
--- a/libs/langchain-openai/src/tests/legacy.int.test.ts
+++ b/libs/langchain-openai/src/tests/legacy.int.test.ts
@@ -1,7 +1,12 @@
+/* eslint-disable no-process-env */
+
 import { expect, test } from "@jest/globals";
 import { CallbackManager } from "@langchain/core/callbacks/manager";
 import { OpenAIChat } from "../legacy.js";
 
+// Save the original value of the 'LANGCHAIN_CALLBACKS_BACKGROUND' environment variable
+const originalBackground = process.env.LANGCHAIN_CALLBACKS_BACKGROUND;
+
 test("Test OpenAI", async () => {
   const model = new OpenAIChat({ modelName: "gpt-3.5-turbo", maxTokens: 10 });
   const res = await model.invoke("Print hello world");
@@ -21,25 +26,35 @@ test("Test OpenAI with prefix messages", async () => {
 });
 
 test("Test OpenAI in streaming mode", async () => {
-  let nrNewTokens = 0;
-  let streamedCompletion = "";
-
-  const model = new OpenAIChat({
-    maxTokens: 10,
-    modelName: "gpt-3.5-turbo",
-    streaming: true,
-    callbackManager: CallbackManager.fromHandlers({
-      async handleLLMNewToken(token: string) {
-        nrNewTokens += 1;
-        streamedCompletion += token;
-      },
-    }),
-  });
-  const res = await model.invoke("Print hello world");
-  console.log({ res });
-
-  expect(nrNewTokens > 0).toBe(true);
-  expect(res).toBe(streamedCompletion);
+  // Running LangChain callbacks in the background will sometimes cause the callbackManager to execute
+  // after the test/llm call has already finished & returned. Set that environment variable to false
+  // to prevent that from happening.
+  process.env.LANGCHAIN_CALLBACKS_BACKGROUND = "false";
+
+  try {
+    let nrNewTokens = 0;
+    let streamedCompletion = "";
+
+    const model = new OpenAIChat({
+      maxTokens: 10,
+      modelName: "gpt-3.5-turbo",
+      streaming: true,
+      callbackManager: CallbackManager.fromHandlers({
+        async handleLLMNewToken(token: string) {
+          nrNewTokens += 1;
+          streamedCompletion += token;
+        },
+      }),
+    });
+    const res = await model.invoke("Print hello world");
+    console.log({ res });
+
+    expect(nrNewTokens > 0).toBe(true);
+    expect(res).toBe(streamedCompletion);
+  } finally {
+    // Reset the environment variable
+    process.env.LANGCHAIN_CALLBACKS_BACKGROUND = originalBackground;
+  }
 }, 30000);
 
 test("Test OpenAI with stop", async () => {
diff --git a/libs/langchain-openai/src/tests/llms.int.test.ts b/libs/langchain-openai/src/tests/llms.int.test.ts
index 4704aedc3dc5..8706e1634c55 100644
--- a/libs/langchain-openai/src/tests/llms.int.test.ts
+++ b/libs/langchain-openai/src/tests/llms.int.test.ts
@@ -1,3 +1,5 @@
+/* eslint-disable no-process-env */
+
 import { test, expect } from "@jest/globals";
 import { LLMResult } from "@langchain/core/outputs";
 import { StringPromptValue } from "@langchain/core/prompt_values";
@@ -6,6 +8,9 @@ import { NewTokenIndices } from "@langchain/core/callbacks/base";
 import { OpenAIChat } from "../legacy.js";
 import { OpenAI } from "../llms.js";
 
+// Save the original value of the 'LANGCHAIN_CALLBACKS_BACKGROUND' environment variable
+const originalBackground = process.env.LANGCHAIN_CALLBACKS_BACKGROUND;
+
 test("Test OpenAI", async () => {
   const model = new OpenAI({
     maxTokens: 5,
@@ -140,25 +145,35 @@ test("Test OpenAI with versioned instruct model returns OpenAI", async () => {
 });
 
 test("Test ChatOpenAI tokenUsage", async () => {
-  let tokenUsage = {
-    completionTokens: 0,
-    promptTokens: 0,
-    totalTokens: 0,
-  };
+  // Running LangChain callbacks in the background will sometimes cause the callbackManager to execute
+  // after the test/llm call has already finished & returned. Set that environment variable to false
+  // to prevent that from happening.
+  process.env.LANGCHAIN_CALLBACKS_BACKGROUND = "false";
+
+  try {
+    let tokenUsage = {
+      completionTokens: 0,
+      promptTokens: 0,
+      totalTokens: 0,
+    };
 
-  const model = new OpenAI({
-    maxTokens: 5,
-    modelName: "gpt-3.5-turbo-instruct",
-    callbackManager: CallbackManager.fromHandlers({
-      async handleLLMEnd(output: LLMResult) {
-        tokenUsage = output.llmOutput?.tokenUsage;
-      },
-    }),
-  });
-  const res = await model.invoke("Hello");
-  console.log({ res });
+    const model = new OpenAI({
+      maxTokens: 5,
+      modelName: "gpt-3.5-turbo-instruct",
+      callbackManager: CallbackManager.fromHandlers({
+        async handleLLMEnd(output: LLMResult) {
+          tokenUsage = output.llmOutput?.tokenUsage;
+        },
+      }),
+    });
+    const res = await model.invoke("Hello");
+    console.log({ res });
 
-  expect(tokenUsage.promptTokens).toBe(1);
+    expect(tokenUsage.promptTokens).toBe(1);
+  } finally {
+    // Reset the environment variable
+    process.env.LANGCHAIN_CALLBACKS_BACKGROUND = originalBackground;
+  }
 });
 
 test("Test OpenAI in streaming mode", async () => {