From ccbc340f6bd783bb54e90d9139ab0e41ea0c33c3 Mon Sep 17 00:00:00 2001
From: sigoden <sigoden@gmail.com>
Date: Sat, 7 Dec 2024 18:51:38 +0800
Subject: [PATCH] refactor: several improvements (#1044)

- update models.yaml
- update azure openai api version
- agent `run_instructions_fn`
---
 models.yaml                | 90 ++++++++++++++++++++++++++++++++++----
 src/client/azure_openai.rs |  4 +-
 src/config/agent.rs        |  5 +--
 3 files changed, 85 insertions(+), 14 deletions(-)

diff --git a/models.yaml b/models.yaml
index 9d353e2a..c42c155d 100644
--- a/models.yaml
+++ b/models.yaml
@@ -114,7 +114,7 @@
       output_price: 0
       supports_vision: true
       supports_function_calling: true
-    - name: gemini-exp-1121
+    - name: gemini-exp-1206
       max_input_tokens: 32768
       max_output_tokens: 8192
       input_price: 0
@@ -351,6 +351,11 @@
 #  - https://console.groq.com/docs/api-reference#chat
 - platform: groq
   models:
+    - name: llama-3.3-70b-versatile
+      max_input_tokens: 128000
+      input_price: 0
+      output_price: 0
+      supports_function_calling: true
     - name: llama-3.1-70b-versatile
       max_input_tokens: 128000
       input_price: 0
@@ -399,8 +404,12 @@
     - name: llama3.2-vision
       max_input_tokens: 128000
       supports_vision: true
-    - name: gemma2
-      max_input_tokens: 8192
+    - name: llama3.3
+      max_input_tokens: 128000
+      supports_function_calling: true
+    - name: qwq
+      max_input_tokens: 32768
+      supports_function_calling: true
     - name: qwen2.5
       max_input_tokens: 128000
       supports_function_calling: true
@@ -409,6 +418,8 @@
       supports_function_calling: true
     - name: deepseek-coder-v2
       max_input_tokens: 32768
+    - name: gemma2
+      max_input_tokens: 8192
     - name: nomic-embed-text
       type: embedding
       max_tokens_per_chunk: 8192
@@ -523,7 +534,6 @@
 # Links:
 #  - https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns
 #  - https://aws.amazon.com/bedrock/pricing/
-#  - https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html
 #  - https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-support.html
 - platform: bedrock
   models:
@@ -624,6 +634,23 @@
       require_max_tokens: true
       input_price: 0.1
       output_price: 0.1
+    - name: us.amazon.nova-pro-v1:0
+      max_input_tokens: 300000
+      max_output_tokens: 5120
+      input_price: 0.8
+      output_price: 3.2
+      supports_vision: true
+    - name: us.amazon.nova-lite-v1:0
+      max_input_tokens: 300000
+      max_output_tokens: 5120
+      input_price: 0.06
+      output_price: 0.24
+      supports_vision: true
+    - name: us.amazon.nova-micro-v1:0
+      max_input_tokens: 128000
+      max_output_tokens: 5120
+      input_price: 0.035
+      output_price: 0.14
     - name: mistral.mistral-large-2407-v1:0
       max_input_tokens: 128000
       input_price: 2
@@ -667,6 +694,12 @@
 #  - https://developers.cloudflare.com/workers-ai/configuration/open-ai-compatibility/
 - platform: cloudflare
   models:
+    - name: '@cf/meta/llama-3.3-70b-instruct-fp8-fast'
+      max_input_tokens: 6144
+      max_output_tokens: 2048
+      require_max_tokens: true
+      input_price: 0
+      output_price: 0
     - name: '@cf/meta/llama-3.1-70b-instruct'
       max_input_tokens: 6144
       max_output_tokens: 2048
@@ -1044,9 +1077,15 @@
     - name: ministral-3b
       max_input_tokens: 128000
       supports_function_calling: true
+    - name: cohere-command-r-plus-08-2024
+      max_input_tokens: 128000
+      supports_function_calling: true
     - name: cohere-command-r-plus
       max_input_tokens: 128000
       supports_function_calling: true
+    - name: cohere-command-r-08-2024
+      max_input_tokens: 128000
+      supports_function_calling: true
     - name: cohere-command-r
       max_input_tokens: 128000
       supports_function_calling: true
@@ -1079,6 +1118,10 @@
 #  - https://deepinfra.com/pricing
 - platform: deepinfra
   models:
+    - name: meta-llama/Llama-3.3-70B-Instruct
+      max_input_tokens: 128000
+      input_price: 0.23
+      output_price: 0.40
     - name: meta-llama/Meta-Llama-3.1-405B-Instruct
       max_input_tokens: 32000
       input_price: 1.79
@@ -1176,6 +1219,10 @@
 #  - https://fireworks.ai/pricing
 - platform: fireworks
   models:
+    - name: accounts/fireworks/models/llama-v3p3-70b-instruct
+      max_input_tokens: 131072
+      input_price: 0.9
+      output_price: 0.9
     - name: accounts/fireworks/models/llama-v3p1-405b-instruct
       max_input_tokens: 131072
       input_price: 3
@@ -1224,6 +1271,11 @@
       max_input_tokens: 32768
       input_price: 0.9
       output_price: 0.9
+    - name: accounts/fireworks/models/qwen2-vl-72b-instruct
+      max_input_tokens: 32768
+      input_price: 0.9
+      output_price: 0.9
+      supports_vision: true
     - name: accounts/fireworks/models/phi-3-vision-128k-instruct
       max_input_tokens: 131072
       input_price: 0.2
@@ -1385,6 +1437,10 @@
       output_price: 1.25
       supports_vision: true
       supports_function_calling: true
+    - name: meta-llama/llama-3.3-70b-instruct
+      max_input_tokens: 131072
+      input_price: 0.88
+      output_price: 0.88
     - name: meta-llama/llama-3.1-405b-instruct
       max_input_tokens: 131072
       input_price: 2.8
@@ -1550,6 +1606,23 @@
       input_price: 5
       output_price: 15
       supports_vision: true
+    - name: amazon/nova-pro-v1 
+      max_input_tokens: 300000
+      max_output_tokens: 5120
+      input_price: 0.8
+      output_price: 3.2
+      supports_vision: true
+    - name: amazon/nova-lite-v1
+      max_input_tokens: 300000
+      max_output_tokens: 5120
+      input_price: 0.06
+      output_price: 0.24
+      supports_vision: true
+    - name: amazon/nova-micro-v1
+      max_input_tokens: 128000
+      max_output_tokens: 5120
+      input_price: 0.035
+      output_price: 0.14
 
 # Links
 #  - https://cloud.siliconflow.cn/models
@@ -1613,10 +1686,6 @@
       input_price: 0.186
       output_price: 0.186
       supports_function_calling: true
-    - name: nvidia/Llama-3.1-Nemotron-70B-Instruct
-      max_input_tokens: 32768
-      input_price: 0.578
-      output_price: 0.578
     - name: Tencent/Hunyuan-A52B-Instruct
       max_input_tokens: 32768
       input_price: 2.94
@@ -1650,6 +1719,11 @@
 #  - https://www.together.ai/pricing
 - platform: together
   models:
+    - name: meta-llama/Llama-3.3-70B-Instruct-Turbo
+      max_input_tokens: 32768
+      input_price: 0.88
+      output_price: 0.88
+      supports_function_calling: true
     - name: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
       max_input_tokens: 32768
       input_price: 3.5
diff --git a/src/client/azure_openai.rs b/src/client/azure_openai.rs
index 7052b841..cd1d9e4f 100644
--- a/src/client/azure_openai.rs
+++ b/src/client/azure_openai.rs
@@ -51,7 +51,7 @@ fn prepare_chat_completions(
     let api_key = self_.get_api_key()?;
 
     let url = format!(
-        "{}/openai/deployments/{}/chat/completions?api-version=2024-02-01",
+        "{}/openai/deployments/{}/chat/completions?api-version=2024-10-21",
         &api_base,
         self_.model.name()
     );
@@ -70,7 +70,7 @@ fn prepare_embeddings(self_: &AzureOpenAIClient, data: &EmbeddingsData) -> Resul
     let api_key = self_.get_api_key()?;
 
     let url = format!(
-        "{}/openai/deployments/{}/embeddings?api-version=2024-02-01",
+        "{}/openai/deployments/{}/embeddings?api-version=2024-10-21",
         &api_base,
         self_.model.name()
     );
diff --git a/src/config/agent.rs b/src/config/agent.rs
index c81ed119..6418efab 100644
--- a/src/config/agent.rs
+++ b/src/config/agent.rs
@@ -334,10 +334,7 @@ impl Agent {
             self.variable_envs(),
         )?;
         match value {
-            Some(v) => {
-                println!();
-                Ok(v)
-            }
+            Some(v) => Ok(v),
             _ => bail!("No return value from '_instructions' function"),
         }
     }