Merge pull request #63 from InternLM/mistral

[add] mistral-7b model
InternLM · Mar 1, 2024 · 84bfb23 · 84bfb23
2 parents e30b5de + 3a098aa
commit 84bfb23
Show file tree

Hide file tree

Showing 17 changed files with 189 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -16,6 +16,7 @@ English | [简体中文](docs/README_zh-CN.md)
 
 
 ## Latest Progress 🎉
+- \[February 2024\] Add mistral-7b model
 - \[February 2024\] Add gemini-pro model
 - \[January 2024\] refactor the config-template.yaml to control the backend and the frontend settings at the same time, [click](https://github.com/InternLM/OpenAOE/blob/main/docs/tech-report/config-template.md) to find more introduction about the `config-template.yaml`
 - \[January 2024\] Add internlm2-chat-7b model

diff --git a/docs/README_zh-CN.md b/docs/README_zh-CN.md
@@ -13,6 +13,7 @@
 
 
 ## 最新进展 🎉
+- \[2024/02\] 添加 mistral-7b 模型
 - \[2024/02\] 添加 gemini-pro 模型
 - \[2024/01\] 重构了 config-template.yaml，可以同时配置前后端的设置
 - \[2024/01\] 添加 internlm2-chat-7b 模型

diff --git a/docs/todo/TODO.md b/docs/todo/TODO.md
@@ -4,6 +4,9 @@
 - [x] add internlm2-chat-7b model as default
 - [x] add Gemini model as default
 - [x] refactor the config.yaml to make the model setting looks more logical
+- [x] add Mistral-7b model
+- [x] add Gemma model
+- [ ] involve ollama as one of the inference engines
 - [ ] dynamic add new model by editing external python files and the config.yaml 
 - [ ] build frontend project when OpenAOE start up
 - [ ] support image interaction
diff --git a/openaoe/backend/api/route_mistral.py b/openaoe/backend/api/route_mistral.py
@@ -0,0 +1,18 @@
+from fastapi import APIRouter, Request, Response
+
+from openaoe.backend.service.mistral import Mistral
+from openaoe.backend.model.openaoe import AoeChatBody
+
+router = APIRouter()
+
+
+@router.post("/v1/mistral/chat", tags=["Mistral"])
+async def mistral_chat(body: AoeChatBody, request: Request, response: Response):
+    """
+    chat api for Mistral 7b model
+    :param body: request body
+    :param request: request
+    :param response: response
+    :return
+    """
+    return await Mistral(request, response).chat(body)
diff --git a/openaoe/backend/config/config-template.yaml b/openaoe/backend/config/config-template.yaml
@@ -77,4 +77,24 @@ models:
             app_id:
             ak:
             sk:
+    mistral-7b:
+        provider: mistral
+        webui:
+            avatar: 'https://oss.openmmlab.com/frontend/OpenAOE/mistral.webp'
+            isStream: true
+            background: 'linear-gradient(#4848cf26 0%, #7498be 100%)'
+            path: '/v1/mistral/v1/mistral/chat'
+            payload:
+                messages: [ ]
+                model: mistral
+                prompt: ""
+                role_meta:
+                    user_name: "user"
+                    bot_name: "assistant"
+                stream: true
+        api:
+            api_base: http://localhost:11434
+            app_id:
+            ak:
+            sk:
 ...
diff --git a/openaoe/backend/config/constant.py b/openaoe/backend/config/constant.py
@@ -11,6 +11,7 @@
 PROVIDER_XUNFEI = "spark"
 PROVIDER_CLAUDE = "claude"
 PROVIDER_INTERNLM = "internlm"
+PROVIDER_MISTRAL = "mistral"
 
 DEFAULT_TIMEOUT_SECONDS = 600
 
diff --git a/openaoe/backend/model/mistral.py b/openaoe/backend/model/mistral.py
@@ -0,0 +1,36 @@
+"""
+ref. to https://github.com/ollama/ollama/blob/main/docs/api.md
+Parameters
+model: (required) the model name
+messages: the messages of the chat, this can be used to keep a chat memory
+The message object has the following fields:
+
+role: the role of the message, either system, user or assistant
+content: the content of the message
+images (optional): a list of images to include in the message (for multimodal models such as llava)
+Advanced parameters (optional):
+
+format: the format to return a response in. Currently the only accepted value is json
+options: additional model parameters listed in the documentation for the Modelfile such as temperature
+template: the prompt template to use (overrides what is defined in the Modelfile)
+stream: if false the response will be returned as a single response object, rather than a stream of objects
+keep_alive: controls how long the model will stay loaded into memory following the request (default: 5m)
+"""
+
+from typing import List, Optional, Literal, Dict
+from pydantic import BaseModel
+
+
+class Message(BaseModel):
+    role: Optional[Literal["user", "system", "assistant"]] = "user"
+    content: str
+    images: Optional[List[str]] = None  # img in base64
+
+
+class MistralChatBody(BaseModel):
+    model: str
+    messages: List[Message]
+    options: Optional[Dict] = {}
+    template: Optional[str] = None
+    stream: Optional[bool] = True
+    keep_alive: Optional[str] = '5m'
diff --git a/openaoe/backend/model/openaoe.py b/openaoe/backend/model/openaoe.py
@@ -0,0 +1,25 @@
+from typing import Optional, List, Literal
+from pydantic import BaseModel
+
+
+class Context(BaseModel):
+    send_type: str = 'assistant'
+    sender_type: str = "assistant"
+    text: str = ''
+
+
+class RoleMeta(BaseModel):
+    user_name: Optional[str] = 'user'
+    bot_name: Optional[str] = 'assistant'
+
+
+class AoeChatBody(BaseModel):
+    """
+    OpenAOE general request body
+    """
+    model: str
+    prompt: str
+    messages: Optional[List[Context]] = []
+    role_meta: Optional[RoleMeta] = None
+    type: Optional[Literal['text', 'json']] = 'json'
+    stream: Optional[bool] = True
diff --git a/openaoe/backend/requirements.txt b/openaoe/backend/requirements.txt
@@ -12,4 +12,5 @@ pyyaml==6.0.1
 httpx==0.25.0
 sse-starlette==1.8.2
 anyio==3.7.1
-jsonstreamer==1.3.8
+jsonstreamer==1.3.8
+twine==5.0.0
diff --git a/openaoe/backend/service/mistral.py b/openaoe/backend/service/mistral.py
@@ -0,0 +1,61 @@
+import json
+
+import requests
+from fastapi import Request, Response
+from sse_starlette import EventSourceResponse
+
+from openaoe.backend.config.biz_config import get_base_url
+from openaoe.backend.config.constant import PROVIDER_MISTRAL
+from openaoe.backend.model.openaoe import AoeChatBody
+from openaoe.backend.model.mistral import MistralChatBody, Message
+
+from openaoe.backend.util.log import log
+logger = log(__name__)
+
+
+class Mistral:
+    def __init__(self, request: Request, response: Response):
+        self.request = request
+        self.response = response
+
+    async def chat(self, body: AoeChatBody):
+        msgs = []
+        for msg in body.messages:
+            m = Message(role=msg.sender_type if msg.sender_type != 'bot' else "assistant", content=msg.text)
+            msgs.append(m)
+        last_m = Message(role='user', content=body.prompt)
+        msgs.append(last_m)
+        chat_url = get_base_url(PROVIDER_MISTRAL, body.model) + "/api/chat"
+        chat_body = MistralChatBody(
+            model="mistral",
+            messages=msgs
+        )
+        return self.chat_response_streaming(chat_url, chat_body)
+
+    def chat_response_streaming(self, chat_url: str, chat_body: MistralChatBody):
+        async def do_response_streaming():
+            try:
+                res = requests.post(chat_url, json=json.loads(chat_body.model_dump_json()), stream=True)
+                if res:
+                    for chunk in res.iter_content(chunk_size=512, decode_unicode=True):
+                        chunk = bytes.decode(chunk)
+                        logger.info(f"chunk: {chunk}")
+                        chunk_json = json.loads(chunk)
+                        yield json.dumps({
+                            "success": True,
+                            "msg": chunk_json.get("message").get("content")
+                        }, ensure_ascii=False)
+            except Exception as e:
+                logger.error(f"{e}")
+                yield json.dumps(
+                    {
+                        "success": "false",
+                        "msg": f"from backend: {e}"
+                    }
+                )
+
+        return EventSourceResponse(do_response_streaming())
+
+
+
+