From a4fbe10652146fa478c2057cd696e58bbb705b55 Mon Sep 17 00:00:00 2001
From: Brace Sproul <braceasproul@gmail.com>
Date: Fri, 13 Oct 2023 11:42:33 -0700
Subject: [PATCH] Chore: conversational QA docs to use Runnables (#2890)

* Chore: conversational QA docs to use Runnables

* fix: drop externally managed memory section (first example does this)

* feat: added new runnables example for built in memory and moved all old code examples to legacy

* fix: add legacy streaming code examples file

* feat: added streaming example

* fix: better console log examples

* Update docs/docs_skeleton/docs/modules/chains/popular/chat_vector_db_legacy.mdx

* chore: lint files

* fix: remove dive deeper

* fix: removed run func wrapper, added real newlines to prompt template for streaming

* fix: removed run func wrapper, added real newlines to prompt template for built in memory

* fix: removed run func wrapper for base example

* Reword explanation

---------

Co-authored-by: jacoblee93 <jacoblee93@gmail.com>
---
 .../modules/chains/popular/chat_vector_db.mdx |  14 +-
 .../chains/popular/chat_vector_db_legacy.mdx  |  15 ++
 .../modules/chains/popular/chat_vector_db.mdx |  65 +-----
 .../chains/popular/chat_vector_db_legacy.mdx  |  79 ++++++++
 ...> conversation_qa_custom_prompt_legacy.ts} |   0
 examples/src/chains/conversational_qa.ts      | 121 ++++++++---
 .../conversational_qa_built_in_memory.ts      | 190 +++++++++++++++---
 ...onversational_qa_built_in_memory_legacy.ts |  44 ++++
 ...nversational_qa_external_memory_legacy.ts} |   0
 .../src/chains/conversational_qa_legacy.ts    |  38 ++++
 .../src/chains/conversational_qa_streaming.ts | 137 ++++++++-----
 .../conversational_qa_streaming_legacy.ts     |  52 +++++
 12 files changed, 589 insertions(+), 166 deletions(-)
 create mode 100644 docs/docs_skeleton/docs/modules/chains/popular/chat_vector_db_legacy.mdx
 create mode 100644 docs/snippets/modules/chains/popular/chat_vector_db_legacy.mdx
 rename examples/src/chains/{conversation_qa_custom_prompt.ts => conversation_qa_custom_prompt_legacy.ts} (100%)
 create mode 100644 examples/src/chains/conversational_qa_built_in_memory_legacy.ts
 rename examples/src/chains/{conversational_qa_external_memory.ts => conversational_qa_external_memory_legacy.ts} (100%)
 create mode 100644 examples/src/chains/conversational_qa_legacy.ts
 create mode 100644 examples/src/chains/conversational_qa_streaming_legacy.ts

diff --git a/docs/docs_skeleton/docs/modules/chains/popular/chat_vector_db.mdx b/docs/docs_skeleton/docs/modules/chains/popular/chat_vector_db.mdx
index 5eb184025398..1adc551ac9ba 100644
--- a/docs/docs_skeleton/docs/modules/chains/popular/chat_vector_db.mdx
+++ b/docs/docs_skeleton/docs/modules/chains/popular/chat_vector_db.mdx
@@ -3,11 +3,19 @@ sidebar_position: 2
 ---
 
 # Conversational Retrieval QA
-The ConversationalRetrievalQA chain builds on RetrievalQAChain to provide a chat history component.
 
-It first combines the chat history (either explicitly passed in or retrieved from the provided memory) and the question into a standalone question, then looks up relevant documents from the retriever, and finally passes those documents and the question to a question answering chain to return a response.
+:::info
+Looking for the older, non-LCEL version? Click [here](/docs/modules/chains/popular/chat_vector_db_legacy).
+:::
 
-To create one, you will need a retriever. In the below example, we will create one from a vector store, which can be created from embeddings.
+A common requirement for retrieval-augmented generation chains is support for followup questions.
+Followup questions can contain references to past chat history (e.g. "What did Biden say about Justice Breyer", followed by "Was that nice?"), which make them ill-suited
+to direct retriever similarity search .
+
+To support followups, you can add an additional step prior to retrieval that combines the chat history (either explicitly passed in or retrieved from the provided memory) and the question into a standalone question. 
+It then performs the standard retrieval steps of looking up relevant documents from the retriever and passing those documents and the question into a question answering chain to return a response.
+
+To create a conversational question-answering chain, you will need a retriever. In the below example, we will create one from a vector store, which can be created from embeddings.
 
 import Example from "@snippets/modules/chains/popular/chat_vector_db.mdx"
 
diff --git a/docs/docs_skeleton/docs/modules/chains/popular/chat_vector_db_legacy.mdx b/docs/docs_skeleton/docs/modules/chains/popular/chat_vector_db_legacy.mdx
new file mode 100644
index 000000000000..56ffddce6dd9
--- /dev/null
+++ b/docs/docs_skeleton/docs/modules/chains/popular/chat_vector_db_legacy.mdx
@@ -0,0 +1,15 @@
+# Conversational Retrieval QA
+
+:::info
+Looking for the LCEL version? Click [here](/docs/modules/chains/popular/chat_vector_db).
+:::
+
+The ConversationalRetrievalQA chain builds on RetrievalQAChain to provide a chat history component.
+
+It first combines the chat history (either explicitly passed in or retrieved from the provided memory) and the question into a standalone question, then looks up relevant documents from the retriever, and finally passes those documents and the question to a question answering chain to return a response.
+
+To create one, you will need a retriever. In the below example, we will create one from a vector store, which can be created from embeddings.
+
+import Example from "@snippets/modules/chains/popular/chat_vector_db.mdx"
+
+<Example/>
diff --git a/docs/snippets/modules/chains/popular/chat_vector_db.mdx b/docs/snippets/modules/chains/popular/chat_vector_db.mdx
index 7901d33daa55..04953e25ccf8 100644
--- a/docs/snippets/modules/chains/popular/chat_vector_db.mdx
+++ b/docs/snippets/modules/chains/popular/chat_vector_db.mdx
@@ -1,36 +1,20 @@
 import CodeBlock from "@theme/CodeBlock";
 import ConvoRetrievalQAExample from "@examples/chains/conversational_qa.ts";
-import Example from "@examples/chains/conversational_qa.ts";
 
 <CodeBlock language="typescript">{ConvoRetrievalQAExample}</CodeBlock>
 
-In the above code snippet, the fromLLM method of the `ConversationalRetrievalQAChain` class has the following signature:
+Here's an explanation of each step in the `RunnableSequence.from()` call above:
 
-```typescript
-static fromLLM(
-  llm: BaseLanguageModel,
-  retriever: BaseRetriever,
-  options?: {
-    questionGeneratorChainOptions?: {
-      llm?: BaseLanguageModel;
-      template?: string;
-    };
-    qaChainOptions?: QAChainParams;
-    returnSourceDocuments?: boolean;
-  }
-): ConversationalRetrievalQAChain
-```
+- The first input passed is an object containing a `question` key. This key is used as the main input for whatever question a user may ask.
+- The next key is `chatHistory`. This is a string of all previous chats (human & AI) concatenated together. This is used to help the model understand the context of the question.
+- The `context` key is used to fetch relevant documents from the loaded context (in this case the State Of The Union speech). It performs a call to the `getRelevantDocuments` method on the retriever, passing in the user's question as the query. We then pass it to our `serializeDocs` util which maps over all returned documents, joins them with newlines and returns a string.
 
-Here's an explanation of each of the attributes of the options object:
+After getting and formatting all inputs we pipe them through the following operations:
+- `questionPrompt` - this is the prompt template which we pass to the model in the next step. Behind the scenes it's taking the inputs outlined above and formatting them into the proper spots outlined in our template.
+- The formatted prompt with context then gets passed to the LLM and a response is generated.
+- Finally, we pipe the result of the LLM call to an output parser which formats the response into a readable string.
 
-- `questionGeneratorChainOptions`: An object that allows you to pass a custom template and LLM to the underlying question generation chain.
-  - If the template is provided, the `ConversationalRetrievalQAChain` will use this template to generate a question from the conversation context instead of using the question provided in the question parameter.
-  - Passing in a separate LLM (`llm`) here allows you to use a cheaper/faster model to create the condensed question while using a more powerful model for the final response, and can reduce unnecessary latency.
-- `qaChainOptions`: Options that allow you to customize the specific QA chain used in the final step. The default is the [`StuffDocumentsChain`](/docs/modules/chains/document/stuff), but you can customize which chain is used by passing in a `type` parameter.
-  **Passing specific options here is completely optional**, but can be useful if you want to customize the way the response is presented to the end user, or if you have too many documents for the default `StuffDocumentsChain`.
-  You can see [the API reference of the usable fields here](/docs/api/chains/types/QAChainParams). In case you want to make chat_history available to the final answering `qaChain`, which ultimately answers the user question, you HAVE to pass a custom qaTemplate with chat_history as input, as it is not present in the default Template, which only gets passed `context` documents and generated `question`.
-- `returnSourceDocuments`: A boolean value that indicates whether the `ConversationalRetrievalQAChain` should return the source documents that were used to retrieve the answer. If set to true, the documents will be included in the result returned by the call() method. This can be useful if you want to allow the user to see the sources used to generate the answer. If not set, the default value will be false.
-  - If you are using this option and passing in a memory instance, set `inputKey` and `outputKey` on the memory instance to the same values as the chain input and final conversational chain output. These default to `"question"` and `"text"` respectively, and specify the values that the memory should store.
+Using this `RunnableSequence` we can pass questions, and chat history to the model for informed conversational question answering.
 
 ## Built-in Memory
 
@@ -44,37 +28,8 @@ import ConvoQABuiltInExample from "@examples/chains/conversational_qa_built_in_m
 
 ## Streaming
 
-You can also use the above concept of using two different LLMs to stream only the final response from the chain, and not output from the intermediate standalone question generation step. Here's an example:
+You can also stream results from the chain. This is useful if you want to stream the output of the chain to a client, or if you want to stream the output of the chain to another chain.
 
 import ConvoQAStreamingExample from "@examples/chains/conversational_qa_streaming.ts";
 
 <CodeBlock language="typescript">{ConvoQAStreamingExample}</CodeBlock>
-
-## Externally-Managed Memory
-
-For this chain, if you'd like to format the chat history in a custom way (or pass in chat messages directly for convenience), you can also pass the chat history in explicitly by omitting the `memory` option and supplying
-a `chat_history` string or array of [HumanMessages](/docs/api/schema/classes/HumanMessage) and [AIMessages](/docs/api/schema/classes/AIMessage) directly into the `chain.call` method:
-
-import ConvoQAExternalMemoryExample from "@examples/chains/conversational_qa_external_memory.ts";
-
-<CodeBlock language="typescript">{ConvoQAExternalMemoryExample}</CodeBlock>
-
-## Prompt Customization
-
-If you want to further change the chain's behavior, you can change the prompts for both the underlying question generation chain and the QA chain.
-
-One case where you might want to do this is to improve the chain's ability to answer meta questions about the chat history.
-By default, the only input to the QA chain is the standalone question generated from the question generation chain.
-This poses a challenge when asking meta questions about information in previous interactions from the chat history.
-
-For example, if you introduce a friend Bob and mention his age as 28, the chain is unable to provide his age upon asking a question like "How old is Bob?".
-This limitation occurs because the bot searches for Bob in the vector store, rather than considering the message history.
-
-You can pass an alternative prompt for the question generation chain that also returns parts of the chat history relevant to the answer,
-allowing the QA chain to answer meta questions with the additional context:
-
-import ConvoRetrievalQAWithCustomPrompt from "@examples/chains/conversation_qa_custom_prompt.ts";
-
-<CodeBlock language="typescript">{ConvoRetrievalQAWithCustomPrompt}</CodeBlock>
-
-Keep in mind that adding more context to the prompt in this way may distract the LLM from other relevant retrieved information.
diff --git a/docs/snippets/modules/chains/popular/chat_vector_db_legacy.mdx b/docs/snippets/modules/chains/popular/chat_vector_db_legacy.mdx
new file mode 100644
index 000000000000..fbbde4ea7fb2
--- /dev/null
+++ b/docs/snippets/modules/chains/popular/chat_vector_db_legacy.mdx
@@ -0,0 +1,79 @@
+import CodeBlock from "@theme/CodeBlock";
+import ConvoRetrievalQAExample from "@examples/chains/conversational_qa_legacy.ts";
+
+<CodeBlock language="typescript">{ConvoRetrievalQAExample}</CodeBlock>
+
+In the above code snippet, the fromLLM method of the `ConversationalRetrievalQAChain` class has the following signature:
+
+```typescript
+static fromLLM(
+  llm: BaseLanguageModel,
+  retriever: BaseRetriever,
+  options?: {
+    questionGeneratorChainOptions?: {
+      llm?: BaseLanguageModel;
+      template?: string;
+    };
+    qaChainOptions?: QAChainParams;
+    returnSourceDocuments?: boolean;
+  }
+): ConversationalRetrievalQAChain
+```
+
+Here's an explanation of each of the attributes of the options object:
+
+- `questionGeneratorChainOptions`: An object that allows you to pass a custom template and LLM to the underlying question generation chain.
+  - If the template is provided, the `ConversationalRetrievalQAChain` will use this template to generate a question from the conversation context instead of using the question provided in the question parameter.
+  - Passing in a separate LLM (`llm`) here allows you to use a cheaper/faster model to create the condensed question while using a more powerful model for the final response, and can reduce unnecessary latency.
+- `qaChainOptions`: Options that allow you to customize the specific QA chain used in the final step. The default is the [`StuffDocumentsChain`](/docs/modules/chains/document/stuff), but you can customize which chain is used by passing in a `type` parameter.
+  **Passing specific options here is completely optional**, but can be useful if you want to customize the way the response is presented to the end user, or if you have too many documents for the default `StuffDocumentsChain`.
+  You can see [the API reference of the usable fields here](/docs/api/chains/types/QAChainParams). In case you want to make chat_history available to the final answering `qaChain`, which ultimately answers the user question, you HAVE to pass a custom qaTemplate with chat_history as input, as it is not present in the default Template, which only gets passed `context` documents and generated `question`.
+- `returnSourceDocuments`: A boolean value that indicates whether the `ConversationalRetrievalQAChain` should return the source documents that were used to retrieve the answer. If set to true, the documents will be included in the result returned by the call() method. This can be useful if you want to allow the user to see the sources used to generate the answer. If not set, the default value will be false.
+  - If you are using this option and passing in a memory instance, set `inputKey` and `outputKey` on the memory instance to the same values as the chain input and final conversational chain output. These default to `"question"` and `"text"` respectively, and specify the values that the memory should store.
+
+## Built-in Memory
+
+Here's a customization example using a faster LLM to generate questions and a slower, more comprehensive LLM for the final answer. It uses a built-in memory object and returns the referenced source documents.
+Because we have `returnSourceDocuments` set and are thus returning multiple values from the chain, we must set `inputKey` and `outputKey` on the memory instance
+to let it know which values to store.
+
+import ConvoQABuiltInExample from "@examples/chains/conversational_qa_built_in_memory_legacy.ts";
+
+<CodeBlock language="typescript">{ConvoQABuiltInExample}</CodeBlock>
+
+## Streaming
+
+You can also use the above concept of using two different LLMs to stream only the final response from the chain, and not output from the intermediate standalone question generation step. Here's an example:
+
+import ConvoQAStreamingExample from "@examples/chains/conversational_qa_streaming_legacy.ts";
+
+<CodeBlock language="typescript">{ConvoQAStreamingExample}</CodeBlock>
+
+## Externally-Managed Memory
+
+For this chain, if you'd like to format the chat history in a custom way (or pass in chat messages directly for convenience), you can also pass the chat history in explicitly by omitting the `memory` option and supplying
+a `chat_history` string or array of [HumanMessages](/docs/api/schema/classes/HumanMessage) and [AIMessages](/docs/api/schema/classes/AIMessage) directly into the `chain.call` method:
+
+import ConvoQAExternalMemoryExample from "@examples/chains/conversational_qa_external_memory_legacy.ts";
+
+<CodeBlock language="typescript">{ConvoQAExternalMemoryExample}</CodeBlock>
+
+## Prompt Customization
+
+If you want to further change the chain's behavior, you can change the prompts for both the underlying question generation chain and the QA chain.
+
+One case where you might want to do this is to improve the chain's ability to answer meta questions about the chat history.
+By default, the only input to the QA chain is the standalone question generated from the question generation chain.
+This poses a challenge when asking meta questions about information in previous interactions from the chat history.
+
+For example, if you introduce a friend Bob and mention his age as 28, the chain is unable to provide his age upon asking a question like "How old is Bob?".
+This limitation occurs because the bot searches for Bob in the vector store, rather than considering the message history.
+
+You can pass an alternative prompt for the question generation chain that also returns parts of the chat history relevant to the answer,
+allowing the QA chain to answer meta questions with the additional context:
+
+import ConvoRetrievalQAWithCustomPrompt from "@examples/chains/conversation_qa_custom_prompt_legacy.ts";
+
+<CodeBlock language="typescript">{ConvoRetrievalQAWithCustomPrompt}</CodeBlock>
+
+Keep in mind that adding more context to the prompt in this way may distract the LLM from other relevant retrieved information.
\ No newline at end of file
diff --git a/examples/src/chains/conversation_qa_custom_prompt.ts b/examples/src/chains/conversation_qa_custom_prompt_legacy.ts
similarity index 100%
rename from examples/src/chains/conversation_qa_custom_prompt.ts
rename to examples/src/chains/conversation_qa_custom_prompt_legacy.ts
diff --git a/examples/src/chains/conversational_qa.ts b/examples/src/chains/conversational_qa.ts
index d1cb28f81ddc..d282753a8331 100644
--- a/examples/src/chains/conversational_qa.ts
+++ b/examples/src/chains/conversational_qa.ts
@@ -1,38 +1,97 @@
 import { ChatOpenAI } from "langchain/chat_models/openai";
-import { ConversationalRetrievalQAChain } from "langchain/chains";
 import { HNSWLib } from "langchain/vectorstores/hnswlib";
 import { OpenAIEmbeddings } from "langchain/embeddings/openai";
 import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
-import { BufferMemory } from "langchain/memory";
 import * as fs from "fs";
+import { PromptTemplate } from "langchain/prompts";
+import { RunnableSequence } from "langchain/schema/runnable";
+import { Document } from "langchain/document";
+import { StringOutputParser } from "langchain/schema/output_parser";
 
-export const run = async () => {
-  /* Initialize the LLM to use to answer the question */
-  const model = new ChatOpenAI({});
-  /* Load in the file we want to do question answering over */
-  const text = fs.readFileSync("state_of_the_union.txt", "utf8");
-  /* Split the text into chunks */
-  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
-  const docs = await textSplitter.createDocuments([text]);
-  /* Create the vectorstore */
-  const vectorStore = await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings());
-  /* Create the chain */
-  const chain = ConversationalRetrievalQAChain.fromLLM(
-    model,
-    vectorStore.asRetriever(),
-    {
-      memory: new BufferMemory({
-        memoryKey: "chat_history", // Must be set to "chat_history"
-      }),
-    }
-  );
-  /* Ask it a question */
-  const question = "What did the president say about Justice Breyer?";
-  const res = await chain.call({ question });
-  console.log(res);
-  /* Ask it a follow up question */
-  const followUpRes = await chain.call({
-    question: "Was that nice?",
-  });
-  console.log(followUpRes);
+/* Initialize the LLM to use to answer the question */
+const model = new ChatOpenAI({});
+/* Load in the file we want to do question answering over */
+const text = fs.readFileSync("state_of_the_union.txt", "utf8");
+/* Split the text into chunks */
+const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
+const docs = await textSplitter.createDocuments([text]);
+/* Create the vectorstore */
+const vectorStore = await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings());
+const retriever = vectorStore.asRetriever();
+
+const serializeDocs = (docs: Array<Document>) =>
+  docs.map((doc) => doc.pageContent).join("\n\n");
+
+const formatChatHistory = (
+  human: string,
+  ai: string,
+  previousChatHistory?: string
+) => {
+  const newInteraction = `Human: ${human}\nAI: ${ai}`;
+  if (!previousChatHistory) {
+    return newInteraction;
+  }
+  return `${previousChatHistory}\n\n${newInteraction}`;
 };
+
+/**
+ * Create a prompt template for generating an answer based on context and
+ * a question.
+ *
+ * Chat history will be an empty string if it's the first question.
+ *
+ * inputVariables: ["chatHistory", "context", "question"]
+ */
+const questionPrompt = PromptTemplate.fromTemplate(
+  `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+  ----------------
+  CONTEXT: {context}
+  ----------------
+  CHAT HISTORY: {chatHistory}
+  ----------------
+  QUESTION: {question}
+  ----------------
+  Helpful Answer:`
+);
+
+const chain = RunnableSequence.from([
+  {
+    question: (input: { question: string; chatHistory?: string }) =>
+      input.question,
+    chatHistory: (input: { question: string; chatHistory?: string }) =>
+      input.chatHistory ?? "",
+    context: async (input: { question: string; chatHistory?: string }) => {
+      const relevantDocs = await retriever.getRelevantDocuments(input.question);
+      const serialized = serializeDocs(relevantDocs);
+      return serialized;
+    },
+  },
+  questionPrompt,
+  model,
+  new StringOutputParser(),
+]);
+
+const questionOne = "What did the president say about Justice Breyer?";
+
+const resultOne = await chain.invoke({
+  question: questionOne,
+});
+
+console.log({ resultOne });
+/**
+ * {
+ *   resultOne: 'The president thanked Justice Breyer for his service and described him as an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court.'
+ * }
+ */
+
+const resultTwo = await chain.invoke({
+  chatHistory: formatChatHistory(resultOne, questionOne),
+  question: "Was it nice?",
+});
+
+console.log({ resultTwo });
+/**
+ * {
+ *   resultTwo: "Yes, the president's description of Justice Breyer was positive."
+ * }
+ */
diff --git a/examples/src/chains/conversational_qa_built_in_memory.ts b/examples/src/chains/conversational_qa_built_in_memory.ts
index 3c1b2a1be51a..b174b4139b5a 100644
--- a/examples/src/chains/conversational_qa_built_in_memory.ts
+++ b/examples/src/chains/conversational_qa_built_in_memory.ts
@@ -1,44 +1,172 @@
+import { Document } from "langchain/document";
 import { ChatOpenAI } from "langchain/chat_models/openai";
-import { ConversationalRetrievalQAChain } from "langchain/chains";
+import { LLMChain } from "langchain/chains";
 import { HNSWLib } from "langchain/vectorstores/hnswlib";
 import { OpenAIEmbeddings } from "langchain/embeddings/openai";
 import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
 import { BufferMemory } from "langchain/memory";
-
 import * as fs from "fs";
+import { PromptTemplate } from "langchain/prompts";
+import { RunnableSequence } from "langchain/schema/runnable";
+import { BaseMessage } from "langchain/schema";
 
-export const run = async () => {
-  const text = fs.readFileSync("state_of_the_union.txt", "utf8");
-  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
-  const docs = await textSplitter.createDocuments([text]);
-  const vectorStore = await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings());
-  const fasterModel = new ChatOpenAI({
-    modelName: "gpt-3.5-turbo",
-  });
-  const slowerModel = new ChatOpenAI({
-    modelName: "gpt-4",
+const text = fs.readFileSync("state_of_the_union.txt", "utf8");
+
+const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
+const docs = await textSplitter.createDocuments([text]);
+
+const vectorStore = await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings());
+const retriever = vectorStore.asRetriever();
+
+const memory = new BufferMemory({
+  memoryKey: "chatHistory",
+  inputKey: "question", // The key for the input to the chain
+  outputKey: "text", // The key for the final conversational output of the chain
+  returnMessages: true, // If using with a chat model (e.g. gpt-3.5 or gpt-4)
+});
+
+const serializeDocs = (docs: Array<Document>): string =>
+  docs.map((doc) => doc.pageContent).join("\n");
+
+const serializeChatHistory = (chatHistory: Array<BaseMessage>): string =>
+  chatHistory
+    .map((chatMessage) => {
+      if (chatMessage._getType() === "human") {
+        return `Human: ${chatMessage.content}`;
+      } else if (chatMessage._getType() === "ai") {
+        return `Assistant: ${chatMessage.content}`;
+      } else {
+        return `${chatMessage.content}`;
+      }
+    })
+    .join("\n");
+
+/**
+ * Create two prompt templates, one for answering questions, and one for
+ * generating questions.
+ */
+const questionPrompt = PromptTemplate.fromTemplate(
+  `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+----------    
+CONTEXT: {context}
+----------
+CHAT HISTORY: {chatHistory}
+----------
+QUESTION: {question}
+----------
+Helpful Answer:`
+);
+const questionGeneratorTemplate = PromptTemplate.fromTemplate(
+  `Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
+----------
+CHAT HISTORY: {chatHistory}
+----------
+FOLLOWUP QUESTION: {question}
+----------
+Standalone question:`
+);
+
+// Initialize fast and slow LLMs, along with chains for each
+const fasterModel = new ChatOpenAI({
+  modelName: "gpt-3.5-turbo",
+});
+const fasterChain = new LLMChain({
+  llm: fasterModel,
+  prompt: questionGeneratorTemplate,
+});
+
+const slowerModel = new ChatOpenAI({
+  modelName: "gpt-4",
+});
+const slowerChain = new LLMChain({
+  llm: slowerModel,
+  prompt: questionPrompt,
+});
+
+const performQuestionAnswering = async (input: {
+  question: string;
+  chatHistory: Array<BaseMessage> | null;
+  context: Array<Document>;
+}): Promise<{ result: string; sourceDocuments: Array<Document> }> => {
+  let newQuestion = input.question;
+  // Serialize context and chat history into strings
+  const serializedDocs = serializeDocs(input.context);
+  const chatHistoryString = input.chatHistory
+    ? serializeChatHistory(input.chatHistory)
+    : null;
+
+  if (chatHistoryString) {
+    // Call the faster chain to generate a new question
+    const { text } = await fasterChain.invoke({
+      chatHistory: chatHistoryString,
+      context: serializedDocs,
+      question: input.question,
+    });
+
+    newQuestion = text;
+  }
+
+  const response = await slowerChain.invoke({
+    chatHistory: chatHistoryString ?? "",
+    context: serializedDocs,
+    question: newQuestion,
   });
-  const chain = ConversationalRetrievalQAChain.fromLLM(
-    slowerModel,
-    vectorStore.asRetriever(),
+
+  // Save the chat history to memory
+  await memory.saveContext(
     {
-      returnSourceDocuments: true,
-      memory: new BufferMemory({
-        memoryKey: "chat_history",
-        inputKey: "question", // The key for the input to the chain
-        outputKey: "text", // The key for the final conversational output of the chain
-        returnMessages: true, // If using with a chat model (e.g. gpt-3.5 or gpt-4)
-      }),
-      questionGeneratorChainOptions: {
-        llm: fasterModel,
-      },
+      question: input.question,
+    },
+    {
+      text: response.text,
     }
   );
-  /* Ask it a question */
-  const question = "What did the president say about Justice Breyer?";
-  const res = await chain.call({ question });
-  console.log(res);
 
-  const followUpRes = await chain.call({ question: "Was that nice?" });
-  console.log(followUpRes);
+  return {
+    result: response.text,
+    sourceDocuments: input.context,
+  };
 };
+
+const chain = RunnableSequence.from([
+  {
+    // Pipe the question through unchanged
+    question: (input: { question: string }) => input.question,
+    // Fetch the chat history, and return the history or null if not present
+    chatHistory: async () => {
+      const savedMemory = await memory.loadMemoryVariables({});
+      const hasHistory = savedMemory.chatHistory.length > 0;
+      return hasHistory ? savedMemory.chatHistory : null;
+    },
+    // Fetch relevant context based on the question
+    context: async (input: { question: string }) =>
+      retriever.getRelevantDocuments(input.question),
+  },
+  performQuestionAnswering,
+]);
+
+const resultOne = await chain.invoke({
+  question: "What did the president say about Justice Breyer?",
+});
+console.log({ resultOne });
+/**
+ * {
+ *   resultOne: {
+ *     result: "The president thanked Justice Breyer for his service and described him as an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court.",
+ *     sourceDocuments: [...]
+ *   }
+ * }
+ */
+
+const resultTwo = await chain.invoke({
+  question: "Was he nice?",
+});
+console.log({ resultTwo });
+/**
+ * {
+ *   resultTwo: {
+ *     result: "Yes, the president's description of Justice Breyer was positive."
+ *     sourceDocuments: [...]
+ *   }
+ * }
+ */
diff --git a/examples/src/chains/conversational_qa_built_in_memory_legacy.ts b/examples/src/chains/conversational_qa_built_in_memory_legacy.ts
new file mode 100644
index 000000000000..3c1b2a1be51a
--- /dev/null
+++ b/examples/src/chains/conversational_qa_built_in_memory_legacy.ts
@@ -0,0 +1,44 @@
+import { ChatOpenAI } from "langchain/chat_models/openai";
+import { ConversationalRetrievalQAChain } from "langchain/chains";
+import { HNSWLib } from "langchain/vectorstores/hnswlib";
+import { OpenAIEmbeddings } from "langchain/embeddings/openai";
+import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
+import { BufferMemory } from "langchain/memory";
+
+import * as fs from "fs";
+
+export const run = async () => {
+  const text = fs.readFileSync("state_of_the_union.txt", "utf8");
+  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
+  const docs = await textSplitter.createDocuments([text]);
+  const vectorStore = await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings());
+  const fasterModel = new ChatOpenAI({
+    modelName: "gpt-3.5-turbo",
+  });
+  const slowerModel = new ChatOpenAI({
+    modelName: "gpt-4",
+  });
+  const chain = ConversationalRetrievalQAChain.fromLLM(
+    slowerModel,
+    vectorStore.asRetriever(),
+    {
+      returnSourceDocuments: true,
+      memory: new BufferMemory({
+        memoryKey: "chat_history",
+        inputKey: "question", // The key for the input to the chain
+        outputKey: "text", // The key for the final conversational output of the chain
+        returnMessages: true, // If using with a chat model (e.g. gpt-3.5 or gpt-4)
+      }),
+      questionGeneratorChainOptions: {
+        llm: fasterModel,
+      },
+    }
+  );
+  /* Ask it a question */
+  const question = "What did the president say about Justice Breyer?";
+  const res = await chain.call({ question });
+  console.log(res);
+
+  const followUpRes = await chain.call({ question: "Was that nice?" });
+  console.log(followUpRes);
+};
diff --git a/examples/src/chains/conversational_qa_external_memory.ts b/examples/src/chains/conversational_qa_external_memory_legacy.ts
similarity index 100%
rename from examples/src/chains/conversational_qa_external_memory.ts
rename to examples/src/chains/conversational_qa_external_memory_legacy.ts
diff --git a/examples/src/chains/conversational_qa_legacy.ts b/examples/src/chains/conversational_qa_legacy.ts
new file mode 100644
index 000000000000..d1cb28f81ddc
--- /dev/null
+++ b/examples/src/chains/conversational_qa_legacy.ts
@@ -0,0 +1,38 @@
+import { ChatOpenAI } from "langchain/chat_models/openai";
+import { ConversationalRetrievalQAChain } from "langchain/chains";
+import { HNSWLib } from "langchain/vectorstores/hnswlib";
+import { OpenAIEmbeddings } from "langchain/embeddings/openai";
+import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
+import { BufferMemory } from "langchain/memory";
+import * as fs from "fs";
+
+export const run = async () => {
+  /* Initialize the LLM to use to answer the question */
+  const model = new ChatOpenAI({});
+  /* Load in the file we want to do question answering over */
+  const text = fs.readFileSync("state_of_the_union.txt", "utf8");
+  /* Split the text into chunks */
+  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
+  const docs = await textSplitter.createDocuments([text]);
+  /* Create the vectorstore */
+  const vectorStore = await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings());
+  /* Create the chain */
+  const chain = ConversationalRetrievalQAChain.fromLLM(
+    model,
+    vectorStore.asRetriever(),
+    {
+      memory: new BufferMemory({
+        memoryKey: "chat_history", // Must be set to "chat_history"
+      }),
+    }
+  );
+  /* Ask it a question */
+  const question = "What did the president say about Justice Breyer?";
+  const res = await chain.call({ question });
+  console.log(res);
+  /* Ask it a follow up question */
+  const followUpRes = await chain.call({
+    question: "Was that nice?",
+  });
+  console.log(followUpRes);
+};
diff --git a/examples/src/chains/conversational_qa_streaming.ts b/examples/src/chains/conversational_qa_streaming.ts
index a991cc3f76d6..125f30ba4bce 100644
--- a/examples/src/chains/conversational_qa_streaming.ts
+++ b/examples/src/chains/conversational_qa_streaming.ts
@@ -1,52 +1,97 @@
+import { Document } from "langchain/document";
 import { ChatOpenAI } from "langchain/chat_models/openai";
-import { ConversationalRetrievalQAChain } from "langchain/chains";
 import { HNSWLib } from "langchain/vectorstores/hnswlib";
 import { OpenAIEmbeddings } from "langchain/embeddings/openai";
 import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
-import { BufferMemory } from "langchain/memory";
-
 import * as fs from "fs";
+import { PromptTemplate } from "langchain/prompts";
+import { StringOutputParser } from "langchain/schema/output_parser";
+import { RunnableSequence } from "langchain/schema/runnable";
+
+/* Initialize the LLM & set streaming to true */
+const model = new ChatOpenAI({
+  streaming: true,
+});
+/* Load in the file we want to do question answering over */
+const text = fs.readFileSync("state_of_the_union.txt", "utf8");
+/* Split the text into chunks */
+const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
+const docs = await textSplitter.createDocuments([text]);
+/* Create the vectorstore */
+const vectorStore = await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings());
+const retriever = vectorStore.asRetriever();
+
+const serializeDocs = (docs: Array<Document>) =>
+  docs.map((doc) => doc.pageContent).join("\n\n");
+
+/**
+ * Create a prompt template for generating an answer based on context and
+ * a question.
+ *
+ * Chat history will be an empty string if it's the first question.
+ *
+ * inputVariables: ["chatHistory", "context", "question"]
+ */
+const questionPrompt = PromptTemplate.fromTemplate(
+  `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+----------
+CONTEXT: {context}
+----------
+CHAT HISTORY: {chatHistory}
+----------
+QUESTION: {question}
+----------
+Helpful Answer:`
+);
+
+const chain = RunnableSequence.from([
+  {
+    question: (input: { question: string; chatHistory?: string }) =>
+      input.question,
+    chatHistory: (input: { question: string; chatHistory?: string }) =>
+      input.chatHistory ?? "",
+    context: async (input: { question: string; chatHistory?: string }) => {
+      const relevantDocs = await retriever.getRelevantDocuments(input.question);
+      const serialized = serializeDocs(relevantDocs);
+      return serialized;
+    },
+  },
+  questionPrompt,
+  model,
+  new StringOutputParser(),
+]);
+
+const stream = await chain.stream({
+  question: "What did the president say about Justice Breyer?",
+});
 
-export const run = async () => {
-  const text = fs.readFileSync("state_of_the_union.txt", "utf8");
-  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
-  const docs = await textSplitter.createDocuments([text]);
-  const vectorStore = await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings());
-  let streamedResponse = "";
-  const streamingModel = new ChatOpenAI({
-    streaming: true,
-    callbacks: [
-      {
-        handleLLMNewToken(token) {
-          streamedResponse += token;
-        },
-      },
-    ],
-  });
-  const nonStreamingModel = new ChatOpenAI({});
-  const chain = ConversationalRetrievalQAChain.fromLLM(
-    streamingModel,
-    vectorStore.asRetriever(),
-    {
-      returnSourceDocuments: true,
-      memory: new BufferMemory({
-        memoryKey: "chat_history",
-        inputKey: "question", // The key for the input to the chain
-        outputKey: "text", // The key for the final conversational output of the chain
-        returnMessages: true, // If using with a chat model
-      }),
-      questionGeneratorChainOptions: {
-        llm: nonStreamingModel,
-      },
-    }
-  );
-  /* Ask it a question */
-  const question = "What did the president say about Justice Breyer?";
-  const res = await chain.call({ question });
-  console.log({ streamedResponse });
-  /*
-    {
-      streamedResponse: 'President Biden thanked Justice Breyer for his service, and honored him as an Army veteran, Constitutional scholar and retiring Justice of the United States Supreme Court.'
-    }
-  */
-};
+let streamedResult = "";
+for await (const chunk of stream) {
+  streamedResult += chunk;
+  console.log(streamedResult);
+}
+/**
+ * The
+ * The president
+ * The president honored
+ * The president honored Justice
+ * The president honored Justice Stephen
+ * The president honored Justice Stephen B
+ * The president honored Justice Stephen Brey
+ * The president honored Justice Stephen Breyer
+ * The president honored Justice Stephen Breyer,
+ * The president honored Justice Stephen Breyer, a
+ * The president honored Justice Stephen Breyer, a retiring
+ * The president honored Justice Stephen Breyer, a retiring Justice
+ * The president honored Justice Stephen Breyer, a retiring Justice of
+ * The president honored Justice Stephen Breyer, a retiring Justice of the
+ * The president honored Justice Stephen Breyer, a retiring Justice of the United
+ * The president honored Justice Stephen Breyer, a retiring Justice of the United States
+ * The president honored Justice Stephen Breyer, a retiring Justice of the United States Supreme
+ * The president honored Justice Stephen Breyer, a retiring Justice of the United States Supreme Court
+ * The president honored Justice Stephen Breyer, a retiring Justice of the United States Supreme Court,
+ * The president honored Justice Stephen Breyer, a retiring Justice of the United States Supreme Court, for
+ * The president honored Justice Stephen Breyer, a retiring Justice of the United States Supreme Court, for his
+ * The president honored Justice Stephen Breyer, a retiring Justice of the United States Supreme Court, for his service
+ * The president honored Justice Stephen Breyer, a retiring Justice of the United States Supreme Court, for his service.
+ */
diff --git a/examples/src/chains/conversational_qa_streaming_legacy.ts b/examples/src/chains/conversational_qa_streaming_legacy.ts
new file mode 100644
index 000000000000..a991cc3f76d6
--- /dev/null
+++ b/examples/src/chains/conversational_qa_streaming_legacy.ts
@@ -0,0 +1,52 @@
+import { ChatOpenAI } from "langchain/chat_models/openai";
+import { ConversationalRetrievalQAChain } from "langchain/chains";
+import { HNSWLib } from "langchain/vectorstores/hnswlib";
+import { OpenAIEmbeddings } from "langchain/embeddings/openai";
+import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
+import { BufferMemory } from "langchain/memory";
+
+import * as fs from "fs";
+
+export const run = async () => {
+  const text = fs.readFileSync("state_of_the_union.txt", "utf8");
+  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
+  const docs = await textSplitter.createDocuments([text]);
+  const vectorStore = await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings());
+  let streamedResponse = "";
+  const streamingModel = new ChatOpenAI({
+    streaming: true,
+    callbacks: [
+      {
+        handleLLMNewToken(token) {
+          streamedResponse += token;
+        },
+      },
+    ],
+  });
+  const nonStreamingModel = new ChatOpenAI({});
+  const chain = ConversationalRetrievalQAChain.fromLLM(
+    streamingModel,
+    vectorStore.asRetriever(),
+    {
+      returnSourceDocuments: true,
+      memory: new BufferMemory({
+        memoryKey: "chat_history",
+        inputKey: "question", // The key for the input to the chain
+        outputKey: "text", // The key for the final conversational output of the chain
+        returnMessages: true, // If using with a chat model
+      }),
+      questionGeneratorChainOptions: {
+        llm: nonStreamingModel,
+      },
+    }
+  );
+  /* Ask it a question */
+  const question = "What did the president say about Justice Breyer?";
+  const res = await chain.call({ question });
+  console.log({ streamedResponse });
+  /*
+    {
+      streamedResponse: 'President Biden thanked Justice Breyer for his service, and honored him as an Army veteran, Constitutional scholar and retiring Justice of the United States Supreme Court.'
+    }
+  */
+};