From ccbc340f6bd783bb54e90d9139ab0e41ea0c33c3 Mon Sep 17 00:00:00 2001 From: sigoden Date: Sat, 7 Dec 2024 18:51:38 +0800 Subject: [PATCH] refactor: several improvements (#1044) - update models.yaml - update azure openai api version - agent `run_instructions_fn` --- models.yaml | 90 ++++++++++++++++++++++++++++++++++---- src/client/azure_openai.rs | 4 +- src/config/agent.rs | 5 +-- 3 files changed, 85 insertions(+), 14 deletions(-) diff --git a/models.yaml b/models.yaml index 9d353e2a..c42c155d 100644 --- a/models.yaml +++ b/models.yaml @@ -114,7 +114,7 @@ output_price: 0 supports_vision: true supports_function_calling: true - - name: gemini-exp-1121 + - name: gemini-exp-1206 max_input_tokens: 32768 max_output_tokens: 8192 input_price: 0 @@ -351,6 +351,11 @@ # - https://console.groq.com/docs/api-reference#chat - platform: groq models: + - name: llama-3.3-70b-versatile + max_input_tokens: 128000 + input_price: 0 + output_price: 0 + supports_function_calling: true - name: llama-3.1-70b-versatile max_input_tokens: 128000 input_price: 0 @@ -399,8 +404,12 @@ - name: llama3.2-vision max_input_tokens: 128000 supports_vision: true - - name: gemma2 - max_input_tokens: 8192 + - name: llama3.3 + max_input_tokens: 128000 + supports_function_calling: true + - name: qwq + max_input_tokens: 32768 + supports_function_calling: true - name: qwen2.5 max_input_tokens: 128000 supports_function_calling: true @@ -409,6 +418,8 @@ supports_function_calling: true - name: deepseek-coder-v2 max_input_tokens: 32768 + - name: gemma2 + max_input_tokens: 8192 - name: nomic-embed-text type: embedding max_tokens_per_chunk: 8192 @@ -523,7 +534,6 @@ # Links: # - https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns # - https://aws.amazon.com/bedrock/pricing/ -# - https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html # - https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-support.html - platform: bedrock models: @@ -624,6 +634,23 @@ require_max_tokens: true input_price: 0.1 output_price: 0.1 + - name: us.amazon.nova-pro-v1:0 + max_input_tokens: 300000 + max_output_tokens: 5120 + input_price: 0.8 + output_price: 3.2 + supports_vision: true + - name: us.amazon.nova-lite-v1:0 + max_input_tokens: 300000 + max_output_tokens: 5120 + input_price: 0.06 + output_price: 0.24 + supports_vision: true + - name: us.amazon.nova-micro-v1:0 + max_input_tokens: 128000 + max_output_tokens: 5120 + input_price: 0.035 + output_price: 0.14 - name: mistral.mistral-large-2407-v1:0 max_input_tokens: 128000 input_price: 2 @@ -667,6 +694,12 @@ # - https://developers.cloudflare.com/workers-ai/configuration/open-ai-compatibility/ - platform: cloudflare models: + - name: '@cf/meta/llama-3.3-70b-instruct-fp8-fast' + max_input_tokens: 6144 + max_output_tokens: 2048 + require_max_tokens: true + input_price: 0 + output_price: 0 - name: '@cf/meta/llama-3.1-70b-instruct' max_input_tokens: 6144 max_output_tokens: 2048 @@ -1044,9 +1077,15 @@ - name: ministral-3b max_input_tokens: 128000 supports_function_calling: true + - name: cohere-command-r-plus-08-2024 + max_input_tokens: 128000 + supports_function_calling: true - name: cohere-command-r-plus max_input_tokens: 128000 supports_function_calling: true + - name: cohere-command-r-08-2024 + max_input_tokens: 128000 + supports_function_calling: true - name: cohere-command-r max_input_tokens: 128000 supports_function_calling: true @@ -1079,6 +1118,10 @@ # - https://deepinfra.com/pricing - platform: deepinfra models: + - name: meta-llama/Llama-3.3-70B-Instruct + max_input_tokens: 128000 + input_price: 0.23 + output_price: 0.40 - name: meta-llama/Meta-Llama-3.1-405B-Instruct max_input_tokens: 32000 input_price: 1.79 @@ -1176,6 +1219,10 @@ # - https://fireworks.ai/pricing - platform: fireworks models: + - name: accounts/fireworks/models/llama-v3p3-70b-instruct + max_input_tokens: 131072 + input_price: 0.9 + output_price: 0.9 - name: accounts/fireworks/models/llama-v3p1-405b-instruct max_input_tokens: 131072 input_price: 3 @@ -1224,6 +1271,11 @@ max_input_tokens: 32768 input_price: 0.9 output_price: 0.9 + - name: accounts/fireworks/models/qwen2-vl-72b-instruct + max_input_tokens: 32768 + input_price: 0.9 + output_price: 0.9 + supports_vision: true - name: accounts/fireworks/models/phi-3-vision-128k-instruct max_input_tokens: 131072 input_price: 0.2 @@ -1385,6 +1437,10 @@ output_price: 1.25 supports_vision: true supports_function_calling: true + - name: meta-llama/llama-3.3-70b-instruct + max_input_tokens: 131072 + input_price: 0.88 + output_price: 0.88 - name: meta-llama/llama-3.1-405b-instruct max_input_tokens: 131072 input_price: 2.8 @@ -1550,6 +1606,23 @@ input_price: 5 output_price: 15 supports_vision: true + - name: amazon/nova-pro-v1 + max_input_tokens: 300000 + max_output_tokens: 5120 + input_price: 0.8 + output_price: 3.2 + supports_vision: true + - name: amazon/nova-lite-v1 + max_input_tokens: 300000 + max_output_tokens: 5120 + input_price: 0.06 + output_price: 0.24 + supports_vision: true + - name: amazon/nova-micro-v1 + max_input_tokens: 128000 + max_output_tokens: 5120 + input_price: 0.035 + output_price: 0.14 # Links # - https://cloud.siliconflow.cn/models @@ -1613,10 +1686,6 @@ input_price: 0.186 output_price: 0.186 supports_function_calling: true - - name: nvidia/Llama-3.1-Nemotron-70B-Instruct - max_input_tokens: 32768 - input_price: 0.578 - output_price: 0.578 - name: Tencent/Hunyuan-A52B-Instruct max_input_tokens: 32768 input_price: 2.94 @@ -1650,6 +1719,11 @@ # - https://www.together.ai/pricing - platform: together models: + - name: meta-llama/Llama-3.3-70B-Instruct-Turbo + max_input_tokens: 32768 + input_price: 0.88 + output_price: 0.88 + supports_function_calling: true - name: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo max_input_tokens: 32768 input_price: 3.5 diff --git a/src/client/azure_openai.rs b/src/client/azure_openai.rs index 7052b841..cd1d9e4f 100644 --- a/src/client/azure_openai.rs +++ b/src/client/azure_openai.rs @@ -51,7 +51,7 @@ fn prepare_chat_completions( let api_key = self_.get_api_key()?; let url = format!( - "{}/openai/deployments/{}/chat/completions?api-version=2024-02-01", + "{}/openai/deployments/{}/chat/completions?api-version=2024-10-21", &api_base, self_.model.name() ); @@ -70,7 +70,7 @@ fn prepare_embeddings(self_: &AzureOpenAIClient, data: &EmbeddingsData) -> Resul let api_key = self_.get_api_key()?; let url = format!( - "{}/openai/deployments/{}/embeddings?api-version=2024-02-01", + "{}/openai/deployments/{}/embeddings?api-version=2024-10-21", &api_base, self_.model.name() ); diff --git a/src/config/agent.rs b/src/config/agent.rs index c81ed119..6418efab 100644 --- a/src/config/agent.rs +++ b/src/config/agent.rs @@ -334,10 +334,7 @@ impl Agent { self.variable_envs(), )?; match value { - Some(v) => { - println!(); - Ok(v) - } + Some(v) => Ok(v), _ => bail!("No return value from '_instructions' function"), } }