[BFCL] Add New Model DeepSeek-V3 (#857)

This PR adds new model `DeepSeek-V3` to the leaderboard. Even though this model is open-sourced on huggingface, due to its huge model size (685B), we will use its hosted API endpoint for inference.
ShishirPatil · Dec 29, 2024 · 0cea216 · 0cea216
1 parent 859e707
commit 0cea216
Show file tree

Hide file tree

Showing 7 changed files with 69 additions and 0 deletions.
diff --git a/berkeley-function-call-leaderboard/.env.example b/berkeley-function-call-leaderboard/.env.example
@@ -5,6 +5,7 @@ MISTRAL_API_KEY=
 FIREWORKS_API_KEY=
 ANTHROPIC_API_KEY=
 NVIDIA_API_KEY=nvapi-XXXXXX
+DEEPSEEK_API_KEY=sk-XXXXXX
 YI_API_KEY=
 COHERE_API_KEY=
 GROK_API_KEY=xai-XXXXXX

diff --git a/berkeley-function-call-leaderboard/CHANGELOG.md b/berkeley-function-call-leaderboard/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 All notable changes to the Berkeley Function Calling Leaderboard will be documented in this file.
 
+- [Dec 29, 2024] [#857](https://github.com/ShishirPatil/gorilla/pull/857): Add new model `DeepSeek-V3` to the leaderboard.
 - [Dec 29, 2024] [#855](https://github.com/ShishirPatil/gorilla/pull/855): Add new model `mistralai/Ministral-8B-Instruct-2410` to the leaderboard.
 - [Dec 22, 2024] [#838](https://github.com/ShishirPatil/gorilla/pull/838): Fix parameter type mismatch error in possible answers.
   - Simple: 2 affected

diff --git a/berkeley-function-call-leaderboard/SUPPORTED_MODELS.md b/berkeley-function-call-leaderboard/SUPPORTED_MODELS.md
@@ -10,6 +10,7 @@ Below is a comprehensive table of models supported for running leaderboard evalu
 |Model | Type |
 |---|---|
 |gorilla-openfunctions-v2 | Function Calling|
+|DeepSeek-V3 | Function Calling|
 |claude-3-opus-20240229-FC | Function Calling |
 |claude-3-opus-20240229 | Prompt |
 |claude-3-5-sonnet-20241022-FC | Function Calling |

diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py
@@ -7,6 +7,12 @@
         "Gorilla LLM",
         "Apache 2.0",
     ],
+    "DeepSeek-V3": [
+        "DeepSeek-V3 (FC)",
+        "https://api-docs.deepseek.com/news/news1226",
+        "DeepSeek",
+        "DeepSeek License",
+    ],
     "o1-2024-12-17-FC": [
         "o1-2024-12-17 (FC)",
         "https://openai.com/o1/",
@@ -956,6 +962,7 @@
 NO_COST_MODELS = list(local_inference_handler_map.keys())
 # The following models will also have no cost, even though they are queries through the API.
 NO_COST_MODELS += [
+    "DeepSeek-V3",
     "Nexusflow-Raven-v2",
     "firefunction-v1-FC",
     "firefunction-v2-FC",

diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/constant.py b/berkeley-function-call-leaderboard/bfcl/model_handler/constant.py
@@ -113,6 +113,7 @@
 
 UNDERSCORE_TO_DOT = [
     # TODO: Use the model style to determine this, single source of truth
+    "DeepSeek-V3",
     "o1-2024-12-17-FC",
     "gpt-4o-2024-11-20-FC",
     "gpt-4o-mini-2024-07-18-FC",

diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py
@@ -17,6 +17,7 @@
 from bfcl.model_handler.proprietary_model.claude import ClaudeHandler
 from bfcl.model_handler.proprietary_model.cohere import CohereHandler
 from bfcl.model_handler.proprietary_model.databricks import DatabricksHandler
+from bfcl.model_handler.proprietary_model.deepseek import DeepSeekAPIHandler
 from bfcl.model_handler.proprietary_model.fireworks import FireworksHandler
 from bfcl.model_handler.proprietary_model.functionary import FunctionaryHandler
 from bfcl.model_handler.proprietary_model.gemini import GeminiHandler
@@ -36,6 +37,7 @@
 # Inference through API calls
 api_inference_handler_map = {
     "gorilla-openfunctions-v2": GorillaHandler,
+    "DeepSeek-V3": DeepSeekAPIHandler,
     "o1-2024-12-17-FC": OpenAIHandler,
     "o1-2024-12-17": OpenAIHandler,
     # "o1-mini-2024-09-12-FC": OpenAIHandler,  # o1-mini-2024-09-12 does not support function calling

diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/proprietary_model/deepseek.py b/berkeley-function-call-leaderboard/bfcl/model_handler/proprietary_model/deepseek.py
@@ -0,0 +1,56 @@
+import os
+import time
+
+from bfcl.model_handler.model_style import ModelStyle
+from bfcl.model_handler.proprietary_model.openai import OpenAIHandler
+from bfcl.model_handler.utils import retry_with_backoff
+from openai import OpenAI, RateLimitError
+from overrides import override
+
+
+# For setup instructions, please refer to https://github.com/MeetKai/functionary for setup details.
+class DeepSeekAPIHandler(OpenAIHandler):
+    def __init__(self, model_name, temperature) -> None:
+        super().__init__(model_name, temperature)
+        self.model_style = ModelStyle.OpenAI
+        self.is_fc_model = True
+        self.client = OpenAI(
+            base_url="https://api.deepseek.com", api_key=os.getenv("DEEPSEEK_API_KEY")
+        )
+
+    @retry_with_backoff(RateLimitError)
+    def generate_with_backoff(self, **kwargs):
+        """
+        Per the DeepSeek API documentation:
+        https://api-docs.deepseek.com/quick_start/rate_limit
+
+        DeepSeek API does NOT constrain user's rate limit. We will try out best to serve every request.
+        But please note that when our servers are under high traffic pressure, you may receive 429 (Rate Limit Reached) or 503 (Server Overloaded). When this happens, please wait for a while and retry.
+
+        Thus, backoff is still useful for handling 429 and 503 errors.
+        """
+        start_time = time.time()
+        api_response = self.client.chat.completions.create(**kwargs)
+        end_time = time.time()
+
+        return api_response, end_time - start_time
+
+    @override
+    def _query_FC(self, inference_data: dict):
+        message: list[dict] = inference_data["message"]
+        tools = inference_data["tools"]
+        inference_data["inference_input_log"] = {"message": repr(message), "tools": tools}
+
+        if len(tools) > 0:
+            return self.generate_with_backoff(
+                # The model name is always "deepseek-chat", as per https://api-docs.deepseek.com/quick_start/pricing
+                # Note: Currently, it points to `DeepSeek-V3`
+                model="deepseek-chat",
+                messages=message,
+                tools=tools,
+            )
+        else:
+            return self.generate_with_backoff(
+                model="deepseek-chat",
+                messages=message,
+            )