From ebe08412ad1649c33501cc80edffbb908cc0157e Mon Sep 17 00:00:00 2001
From: DanielZzz <zhonghanjun@baidu.com>
Date: Wed, 20 Sep 2023 13:35:51 +0800
Subject: [PATCH] fix: chat_models Qianfan not compatiable with SystemMessage
 (#10642)

- **Description:** QianfanEndpoint bugs for SystemMessages. When the
`SystemMessage` is input as the messages to
`chat_models.QianfanEndpoint`. A `TypeError` will be raised.
  - **Issue:** #10643
  - **Dependencies:**
  - **Tag maintainer:** @baskaryan
  - **Twitter handle:** no
---
 .../chat/baidu_qianfan_endpoint.ipynb         | 112 +++++++++++++---
 .../llms/baidu_qianfan_endpoint.ipynb         | 122 +++++++++++++++---
 .../baidu_qianfan_endpoint.ipynb              |  71 +++++++---
 .../chat_models/baidu_qianfan_endpoint.py     |  51 ++++++--
 .../langchain/llms/baidu_qianfan_endpoint.py  |   9 +-
 5 files changed, 293 insertions(+), 72 deletions(-)

diff --git a/docs/extras/integrations/chat/baidu_qianfan_endpoint.ipynb b/docs/extras/integrations/chat/baidu_qianfan_endpoint.ipynb
index 69dd217db22a6..c77582e3bef34 100644
--- a/docs/extras/integrations/chat/baidu_qianfan_endpoint.ipynb
+++ b/docs/extras/integrations/chat/baidu_qianfan_endpoint.ipynb
@@ -46,21 +46,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[INFO] [09-15 20:00:29] logging.py:55 [t:139698882193216]: requesting llm api endpoint: /chat/eb-instant\n"
+     ]
+    }
+   ],
    "source": [
     "\"\"\"For basic init and call\"\"\"\n",
-    "from langchain.chat_models.baidu_qianfan_endpoint import QianfanChatEndpoint \n",
+    "from langchain.chat_models import QianfanChatEndpoint \n",
     "from langchain.chat_models.base import HumanMessage\n",
     "import os\n",
-    "os.environ[\"QIAFAN_AK\"] = \"xxx\"\n",
-    "os.environ[\"QIAFAN_AK\"] = \"xxx\"\n",
-    "\n",
+    "os.environ[\"QIANFAN_AK\"] = \"your_ak\"\n",
+    "os.environ[\"QIANFAN_SK\"] = \"your_sk\"\n",
     "\n",
     "chat = QianfanChatEndpoint(\n",
-    "                            qianfan_ak=\"xxx\",\n",
-    "                            qianfan_sk=\"xxx\",\n",
     "                            streaming=True, \n",
     "                            )\n",
     "res = chat([HumanMessage(content=\"write a funny joke\")])\n"
@@ -68,21 +73,55 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[INFO] [09-15 20:00:36] logging.py:55 [t:139698882193216]: requesting llm api endpoint: /chat/eb-instant\n",
+      "[INFO] [09-15 20:00:37] logging.py:55 [t:139698882193216]: async requesting llm api endpoint: /chat/eb-instant\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "chat resp: content='您好，您似乎输入' additional_kwargs={} example=False\n",
+      "chat resp: content='了一个话题标签，请问需要我帮您找到什么资料或者帮助您解答什么问题吗？' additional_kwargs={} example=False\n",
+      "chat resp: content='' additional_kwargs={} example=False\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[INFO] [09-15 20:00:39] logging.py:55 [t:139698882193216]: async requesting llm api endpoint: /chat/eb-instant\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "generations=[[ChatGeneration(text=\"The sea is a vast expanse of water that covers much of the Earth's surface. It is a source of travel, trade, and entertainment, and is also a place of scientific exploration and marine conservation. The sea is an important part of our world, and we should cherish and protect it.\", generation_info={'finish_reason': 'finished'}, message=AIMessage(content=\"The sea is a vast expanse of water that covers much of the Earth's surface. It is a source of travel, trade, and entertainment, and is also a place of scientific exploration and marine conservation. The sea is an important part of our world, and we should cherish and protect it.\", additional_kwargs={}, example=False))]] llm_output={} run=[RunInfo(run_id=UUID('d48160a6-5960-4c1d-8a0e-90e6b51a209b'))]\n",
+      "astream content='The sea is a vast' additional_kwargs={} example=False\n",
+      "astream content=' expanse of water, a place of mystery and adventure. It is the source of many cultures and civilizations, and a center of trade and exploration. The sea is also a source of life and beauty, with its unique marine life and diverse' additional_kwargs={} example=False\n",
+      "astream content=' coral reefs. Whether you are swimming, diving, or just watching the sea, it is a place that captivates the imagination and transforms the spirit.' additional_kwargs={} example=False\n"
+     ]
+    }
+   ],
    "source": [
     "    \n",
-    "from langchain.chat_models.baidu_qianfan_endpoint import QianfanChatEndpoint\n",
+    "from langchain.chat_models import QianfanChatEndpoint\n",
     "from langchain.schema import HumanMessage\n",
-    "import asyncio\n",
     "\n",
     "chatLLM = QianfanChatEndpoint(\n",
     "    streaming=True,\n",
     ")\n",
     "res = chatLLM.stream([HumanMessage(content=\"hi\")], streaming=True)\n",
     "for r in res:\n",
-    "    print(\"chat resp1:\", r)\n",
+    "    print(\"chat resp:\", r)\n",
     "\n",
     "\n",
     "async def run_aio_generate():\n",
@@ -113,9 +152,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[INFO] [09-15 20:00:50] logging.py:55 [t:139698882193216]: requesting llm api endpoint: /chat/bloomz_7b1\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "content='你好！很高兴见到你。' additional_kwargs={} example=False\n"
+     ]
+    }
+   ],
    "source": [
     "chatBloom = QianfanChatEndpoint(\n",
     "                            streaming=True, \n",
@@ -141,9 +195,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[INFO] [09-15 20:00:57] logging.py:55 [t:139698882193216]: requesting llm api endpoint: /chat/eb-instant\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "content='您好，您似乎输入' additional_kwargs={} example=False\n",
+      "content='了一个文本字符串，但并没有给出具体的问题或场景。' additional_kwargs={} example=False\n",
+      "content='如果您能提供更多信息，我可以更好地回答您的问题。' additional_kwargs={} example=False\n",
+      "content='' additional_kwargs={} example=False\n"
+     ]
+    }
+   ],
    "source": [
     "res = chat.stream([HumanMessage(content=\"hi\")], **{'top_p': 0.4, 'temperature': 0.1, 'penalty_score': 1})\n",
     "\n",
@@ -154,7 +226,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -168,11 +240,11 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.2"
+   "version": "3.11.4"
   },
   "vscode": {
    "interpreter": {
-    "hash": "2d8226dd90b7dc6e8932aea372a8bf9fc71abac4be3cdd5a63a36c2a19e3700f"
+    "hash": "6fa70026b407ae751a5c9e6bd7f7d482379da8ad616f98512780b705c84ee157"
    }
   }
  },
diff --git a/docs/extras/integrations/llms/baidu_qianfan_endpoint.ipynb b/docs/extras/integrations/llms/baidu_qianfan_endpoint.ipynb
index 42f082ef24295..46a8deeebb72a 100644
--- a/docs/extras/integrations/llms/baidu_qianfan_endpoint.ipynb
+++ b/docs/extras/integrations/llms/baidu_qianfan_endpoint.ipynb
@@ -47,32 +47,88 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[INFO] [09-15 20:23:22] logging.py:55 [t:140708023539520]: trying to refresh access_token\n",
+      "[INFO] [09-15 20:23:22] logging.py:55 [t:140708023539520]: sucessfully refresh access_token\n",
+      "[INFO] [09-15 20:23:22] logging.py:55 [t:140708023539520]: requesting llm api endpoint: /chat/eb-instant\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.0.280\n",
+      "作为一个人工智能语言模型，我无法提供此类信息。\n",
+      "这种类型的信息可能会违反法律法规，并对用户造成严重的心理和社交伤害。\n",
+      "建议遵守相关的法律法规和社会道德规范，并寻找其他有益和健康的娱乐方式。\n"
+     ]
+    }
+   ],
    "source": [
-    "\"\"\"For basic init and call\"\"\"\n",
-    "from langchain.llms.baidu_qianfan_endpoint import QianfanLLMEndpoint\n",
     "\n",
+    "\"\"\"For basic init and call\"\"\"\n",
+    "from langchain.llms import QianfanLLMEndpoint\n",
     "import os\n",
     "\n",
-    "os.environ[\"QIANFAN_AK\"] = \"xx\"\n",
-    "os.environ[\"QIANFAN_SK\"] = \"xx\"\n",
+    "os.environ[\"QIANFAN_AK\"] = \"your_ak\"\n",
+    "os.environ[\"QIANFAN_SK\"] = \"your_sk\"\n",
     "\n",
-    "llm = QianfanLLMEndpoint(streaming=True, ak=\"xx\", sk=\"xx\")\n",
-    "res = llm(\"hi\")\n"
+    "llm = QianfanLLMEndpoint(streaming=True)\n",
+    "res = llm(\"hi\")\n",
+    "print(res)\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[INFO] [09-15 20:23:26] logging.py:55 [t:140708023539520]: requesting llm api endpoint: /chat/eb-instant\n",
+      "[INFO] [09-15 20:23:27] logging.py:55 [t:140708023539520]: async requesting llm api endpoint: /chat/eb-instant\n",
+      "[INFO] [09-15 20:23:29] logging.py:55 [t:140708023539520]: requesting llm api endpoint: /chat/eb-instant\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "generations=[[Generation(text='Rivers are an important part of the natural environment, providing drinking water, transportation, and other services for human beings. However, due to human activities such as pollution and dams, rivers are facing a series of problems such as water quality degradation and fishery resources decline. Therefore, we should strengthen environmental protection and management, and protect rivers and other natural resources.', generation_info=None)]] llm_output=None run=[RunInfo(run_id=UUID('ffa72a97-caba-48bb-bf30-f5eaa21c996a'))]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[INFO] [09-15 20:23:30] logging.py:55 [t:140708023539520]: async requesting llm api endpoint: /chat/eb-instant\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "As an AI language model\n",
+      ", I cannot provide any inappropriate content. My goal is to provide useful and positive information to help people solve problems.\n",
+      "Mountains are the symbols\n",
+      " of majesty and power in nature, and also the lungs of the world. They not only provide oxygen for human beings, but also provide us with beautiful scenery and refreshing air. We can climb mountains to experience the charm of nature,\n",
+      " but also exercise our body and spirit. When we are not satisfied with the rote, we can go climbing, refresh our energy, and reset our focus. However, climbing mountains should be carried out in an organized and safe manner. If you don\n",
+      "'t know how to climb, you should learn first, or seek help from professionals. Enjoy the beautiful scenery of mountains, but also pay attention to safety.\n"
+     ]
+    }
+   ],
    "source": [
     "\n",
     "\"\"\"Test for llm generate \"\"\"\n",
     "res = llm.generate(prompts=[\"hillo?\"])\n",
-    "import asyncio\n",
     "\"\"\"Test for llm aio generate\"\"\"\n",
     "async def run_aio_generate():\n",
     "    resp = await llm.agenerate(prompts=[\"Write a 20-word article about rivers.\"])\n",
@@ -107,16 +163,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[INFO] [09-15 20:23:36] logging.py:55 [t:140708023539520]: requesting llm api endpoint: /chat/eb-instant\n"
+     ]
+    }
+   ],
    "source": [
-    "llm = QianfanLLMEndpoint(qianfan_ak='xxx', \n",
-    "                            qianfan_sk='xxx', \n",
-    "                            streaming=True, \n",
-    "                            model=\"ERNIE-Bot-turbo\",\n",
-    "                            endpoint=\"eb-instant\",\n",
-    "                            )\n",
+    "llm = QianfanLLMEndpoint(\n",
+    "        streaming=True, \n",
+    "        model=\"ERNIE-Bot-turbo\",\n",
+    "        endpoint=\"eb-instant\",\n",
+    "        )\n",
     "res = llm(\"hi\")"
    ]
   },
@@ -136,9 +199,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[INFO] [09-15 20:23:40] logging.py:55 [t:140708023539520]: requesting llm api endpoint: /chat/eb-instant\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "('generations', [[Generation(text='您好，您似乎输入了一个文本字符串，但并没有给出具体的问题或场景。如果您能提供更多信息，我可以更好地回答您的问题。', generation_info=None)]])\n",
+      "('llm_output', None)\n",
+      "('run', [RunInfo(run_id=UUID('9d0bfb14-cf15-44a9-bca1-b3e96b75befe'))])\n"
+     ]
+    }
+   ],
    "source": [
     "res = llm.generate(prompts=[\"hi\"], streaming=True, **{'top_p': 0.4, 'temperature': 0.1, 'penalty_score': 1})\n",
     "\n",
diff --git a/docs/extras/integrations/text_embedding/baidu_qianfan_endpoint.ipynb b/docs/extras/integrations/text_embedding/baidu_qianfan_endpoint.ipynb
index 21466d2b7655b..5272f235309ea 100644
--- a/docs/extras/integrations/text_embedding/baidu_qianfan_endpoint.ipynb
+++ b/docs/extras/integrations/text_embedding/baidu_qianfan_endpoint.ipynb
@@ -34,26 +34,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[INFO] [09-15 20:01:35] logging.py:55 [t:140292313159488]: trying to refresh access_token\n",
+      "[INFO] [09-15 20:01:35] logging.py:55 [t:140292313159488]: sucessfully refresh access_token\n",
+      "[INFO] [09-15 20:01:35] logging.py:55 [t:140292313159488]: requesting llm api endpoint: /embeddings/embedding-v1\n",
+      "[INFO] [09-15 20:01:35] logging.py:55 [t:140292313159488]: async requesting llm api endpoint: /embeddings/embedding-v1\n",
+      "[INFO] [09-15 20:01:35] logging.py:55 [t:140292313159488]: async requesting llm api endpoint: /embeddings/embedding-v1\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[-0.03313107788562775, 0.052325375378131866, 0.04951248690485954, 0.0077608139254152775, -0.05907672271132469, -0.010798933915793896, 0.03741293027997017, 0.013969100080430508]\n",
+      " [0.0427522286772728, -0.030367236584424973, -0.14847028255462646, 0.055074431002140045, -0.04177454113960266, -0.059512972831726074, -0.043774791061878204, 0.0028191760648041964]\n",
+      " [0.03803155943751335, -0.013231384567916393, 0.0032379645854234695, 0.015074018388986588, -0.006529552862048149, -0.13813287019729614, 0.03297128155827522, 0.044519297778606415]\n"
+     ]
+    }
+   ],
    "source": [
     "\"\"\"For basic init and call\"\"\"\n",
-    "from langchain.embeddings.baidu_qianfan_endpoint import QianfanEmbeddingsEndpoint \n",
+    "from langchain.embeddings import QianfanEmbeddingsEndpoint \n",
     "\n",
     "import os\n",
-    "os.environ[\"QIANFAN_AK\"] = \"xx\"\n",
-    "os.environ[\"QIANFAN_SK\"] = \"xx\"\n",
+    "os.environ[\"QIANFAN_AK\"] = \"your_ak\"\n",
+    "os.environ[\"QIANFAN_SK\"] = \"your_sk\"\n",
     "\n",
-    "embed = QianfanEmbeddingsEndpoint(qianfan_ak='xxx', \n",
-    "                            qianfan_sk='xxx')\n",
+    "embed = QianfanEmbeddingsEndpoint(\n",
+    "    # qianfan_ak='xxx', \n",
+    "    # qianfan_sk='xxx'\n",
+    ")\n",
     "res = embed.embed_documents([\"hi\", \"world\"])\n",
     "\n",
-    "import asyncio\n",
-    "\n",
     "async def aioEmbed():\n",
     "    res = await embed.aembed_query(\"qianfan\")\n",
-    "    print(res)\n",
+    "    print(res[:8])\n",
     "await aioEmbed()\n",
     "\n",
     "import asyncio\n",
@@ -81,16 +102,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[INFO] [09-15 20:01:40] logging.py:55 [t:140292313159488]: requesting llm api endpoint: /embeddings/bge_large_zh\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[-0.0001582596160005778, -0.025089964270591736, -0.03997539356350899, 0.013156415894627571, 0.000135212714667432, 0.012428865768015385, 0.016216561198234558, -0.04126659780740738]\n",
+      "[0.0019113451708108187, -0.008625439368188381, -0.0531032420694828, -0.0018436014652252197, -0.01818147301673889, 0.010310115292668343, -0.008867680095136166, -0.021067561581730843]\n"
+     ]
+    }
+   ],
    "source": [
-    "embed = QianfanEmbeddingsEndpoint(qianfan_ak='xxx', \n",
-    "                            qianfan_sk='xxx',\n",
+    "embed = QianfanEmbeddingsEndpoint(\n",
     "                            model=\"bge_large_zh\",\n",
-    "                            endpoint=\"bge_large_zh\")\n",
+    "                            endpoint=\"bge_large_zh\"\n",
+    "                            )\n",
     "\n",
-    "res = embed.embed_documents([\"hi\", \"world\"])"
+    "res = embed.embed_documents([\"hi\", \"world\"])\n",
+    "for r in res :\n",
+    "    print(r[:8])"
    ]
   }
  ],
diff --git a/libs/langchain/langchain/chat_models/baidu_qianfan_endpoint.py b/libs/langchain/langchain/chat_models/baidu_qianfan_endpoint.py
index a58a7f6a1c7e3..2bcd70c164117 100644
--- a/libs/langchain/langchain/chat_models/baidu_qianfan_endpoint.py
+++ b/libs/langchain/langchain/chat_models/baidu_qianfan_endpoint.py
@@ -26,6 +26,7 @@
     ChatMessage,
     FunctionMessage,
     HumanMessage,
+    SystemMessage,
 )
 from langchain.schema.output import ChatGenerationChunk
 from langchain.utils import get_from_dict_or_env
@@ -80,7 +81,7 @@ class QianfanChatEndpoint(BaseChatModel):
 
             from langchain.chat_models import QianfanChatEndpoint
             qianfan_chat = QianfanChatEndpoint(model="ERNIE-Bot",
-                endpoint="your_endpoint", ak="your_ak", sk="your_sk")
+                endpoint="your_endpoint", qianfan_ak="your_ak", qianfan_sk="your_sk")
     """
 
     model_kwargs: Dict[str, Any] = Field(default_factory=dict)
@@ -174,9 +175,35 @@ def _convert_prompt_msg_params(
         self,
         messages: List[BaseMessage],
         **kwargs: Any,
-    ) -> dict:
+    ) -> Dict[str, Any]:
+        """
+        Converts a list of messages into a dictionary containing the message content
+        and default parameters.
+
+        Args:
+            messages (List[BaseMessage]): The list of messages.
+            **kwargs (Any): Optional arguments to add additional parameters to the
+            resulting dictionary.
+
+        Returns:
+            Dict[str, Any]: A dictionary containing the message content and default
+            parameters.
+
+        """
+        messages_dict: Dict[str, Any] = {
+            "messages": [
+                convert_message_to_dict(m)
+                for m in messages
+                if not isinstance(m, SystemMessage)
+            ]
+        }
+        for i in [i for i, m in enumerate(messages) if isinstance(m, SystemMessage)]:
+            if "system" not in messages_dict:
+                messages_dict["system"] = ""
+            messages_dict["system"] += messages[i].content + "\n"
+
         return {
-            **{"messages": [convert_message_to_dict(m) for m in messages]},
+            **messages_dict,
             **self._default_params,
             **kwargs,
         }
@@ -206,7 +233,7 @@ def _generate(
             lc_msg = AIMessage(content=completion, additional_kwargs={})
             gen = ChatGeneration(
                 message=lc_msg,
-                generation_info=dict(finish_reason="finished"),
+                generation_info=dict(finish_reason="stop"),
             )
             return ChatResult(
                 generations=[gen],
@@ -217,7 +244,7 @@ def _generate(
         lc_msg = AIMessage(content=response_payload["result"], additional_kwargs={})
         gen = ChatGeneration(
             message=lc_msg,
-            generation_info=dict(finish_reason="finished"),
+            generation_info=dict(finish_reason="stop"),
         )
         token_usage = response_payload.get("usage", {})
         llm_output = {"token_usage": token_usage, "model_name": self.model}
@@ -232,12 +259,14 @@ async def _agenerate(
     ) -> ChatResult:
         if self.streaming:
             completion = ""
+            token_usage = {}
             async for chunk in self._astream(messages, stop, run_manager, **kwargs):
                 completion += chunk.text
+
             lc_msg = AIMessage(content=completion, additional_kwargs={})
             gen = ChatGeneration(
                 message=lc_msg,
-                generation_info=dict(finish_reason="finished"),
+                generation_info=dict(finish_reason="stop"),
             )
             return ChatResult(
                 generations=[gen],
@@ -249,7 +278,7 @@ async def _agenerate(
         generations = []
         gen = ChatGeneration(
             message=lc_msg,
-            generation_info=dict(finish_reason="finished"),
+            generation_info=dict(finish_reason="stop"),
         )
         generations.append(gen)
         token_usage = response_payload.get("usage", {})
@@ -269,11 +298,10 @@ def _stream(
                 chunk = ChatGenerationChunk(
                     text=res["result"],
                     message=_convert_resp_to_message_chunk(res),
-                    generation_info={"finish_reason": "finished"},
                 )
                 yield chunk
                 if run_manager:
-                    run_manager.on_llm_new_token(chunk.text)
+                    run_manager.on_llm_new_token(chunk.text, chunk=chunk)
 
     async def _astream(
         self,
@@ -286,8 +314,9 @@ async def _astream(
         async for res in await self.client.ado(**params):
             if res:
                 chunk = ChatGenerationChunk(
-                    text=res["result"], message=_convert_resp_to_message_chunk(res)
+                    text=res["result"],
+                    message=_convert_resp_to_message_chunk(res),
                 )
                 yield chunk
                 if run_manager:
-                    await run_manager.on_llm_new_token(chunk.text)
+                    await run_manager.on_llm_new_token(chunk.text, chunk=chunk)
diff --git a/libs/langchain/langchain/llms/baidu_qianfan_endpoint.py b/libs/langchain/langchain/llms/baidu_qianfan_endpoint.py
index eaf0067485a14..8b548a7cd90d4 100644
--- a/libs/langchain/langchain/llms/baidu_qianfan_endpoint.py
+++ b/libs/langchain/langchain/llms/baidu_qianfan_endpoint.py
@@ -37,7 +37,7 @@ class QianfanLLMEndpoint(LLM):
 
             from langchain.llms import QianfanLLMEndpoint
             qianfan_model = QianfanLLMEndpoint(model="ERNIE-Bot",
-                endpoint="your_endpoint", ak="your_ak", sk="your_sk")
+                endpoint="your_endpoint", qianfan_ak="your_ak", qianfan_sk="your_sk")
     """
 
     model_kwargs: Dict[str, Any] = Field(default_factory=dict)
@@ -132,6 +132,8 @@ def _convert_prompt_msg_params(
         prompt: str,
         **kwargs: Any,
     ) -> dict:
+        if "streaming" in kwargs:
+            kwargs["stream"] = kwargs.pop("streaming")
         return {
             **{"prompt": prompt, "model": self.model},
             **self._default_params,
@@ -191,8 +193,7 @@ def _stream(
         run_manager: Optional[CallbackManagerForLLMRun] = None,
         **kwargs: Any,
     ) -> Iterator[GenerationChunk]:
-        params = self._convert_prompt_msg_params(prompt, **kwargs)
-
+        params = self._convert_prompt_msg_params(prompt, **{**kwargs, "stream": True})
         for res in self.client.do(**params):
             if res:
                 chunk = GenerationChunk(text=res["result"])
@@ -207,7 +208,7 @@ async def _astream(
         run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
         **kwargs: Any,
     ) -> AsyncIterator[GenerationChunk]:
-        params = self._convert_prompt_msg_params(prompt, **kwargs)
+        params = self._convert_prompt_msg_params(prompt, **{**kwargs, "stream": True})
         async for res in await self.client.ado(**params):
             if res:
                 chunk = GenerationChunk(text=res["result"])