diff --git a/models.yaml b/models.yaml index 240d9ac1..353251cb 100644 --- a/models.yaml +++ b/models.yaml @@ -28,13 +28,6 @@ output_price: 10 supports_vision: true supports_function_calling: true - - name: gpt-4o-2024-05-13 - max_input_tokens: 128000 - max_output_tokens: 4096 - input_price: 5 - output_price: 15 - supports_vision: true - supports_function_calling: true - name: chatgpt-4o-latest max_input_tokens: 128000 max_output_tokens: 16384 @@ -114,6 +107,13 @@ output_price: 0 supports_vision: true supports_function_calling: true + - name: gemini-2.0-flash-exp + max_input_tokens: 1048576 + max_output_tokens: 8192 + input_price: 0 + output_price: 0 + supports_vision: true + supports_function_calling: true - name: gemini-exp-1206 max_input_tokens: 32768 max_output_tokens: 8192 @@ -215,11 +215,6 @@ input_price: 0.1 output_price: 0.1 supports_function_calling: true - - name: ministral-3b-latest - max_input_tokens: 128000 - input_price: 0.04 - output_price: 0.04 - supports_function_calling: true - name: open-mistral-nemo max_input_tokens: 128000 input_price: 0.15 @@ -275,24 +270,17 @@ input_price: 2.5 output_price: 10 supports_function_calling: true - - name: command-r-plus - max_input_tokens: 128000 - max_output_tokens: 4096 - input_price: 2.5 - output_price: 10 - supports_function_calling: true - name: command-r-08-2024 max_input_tokens: 128000 max_output_tokens: 4096 input_price: 0.15 output_price: 0.6 supports_function_calling: true - - name: command-r + - name: command-r7b-12-2024 max_input_tokens: 128000 max_output_tokens: 4096 - input_price: 0.15 - output_price: 0.6 - supports_function_calling: true + input_price: 0.0375 + output_price: 0.15 - name: embed-english-v3.0 type: embedding input_price: 0.1 @@ -329,14 +317,25 @@ # Links: # - https://docs.x.ai/docs#models -# - https://cohere.com/pricing # - https://docs.x.ai/api/endpoints#chat-completions - platform: xai models: + - name: grok-2-1212 + max_input_tokens: 131072 + input_price: 2 + output_price: 10 + supports_function_calling: true - name: grok-beta max_input_tokens: 131072 input_price: 5 output_price: 15 + supports_function_calling: true + - name: grok-2-vision-1212 + max_input_tokens: 32768 + input_price: 2 + output_price: 10 + supports_vision: true + supports_function_calling: true - name: grok-vision-beta max_input_tokens: 8192 input_price: 5 @@ -392,14 +391,6 @@ input_price: 0 output_price: 0 supports_vision: true - - name: llama-3.2-3b-preview - max_input_tokens: 128000 - input_price: 0 - output_price: 0 - - name: llama-3.2-1b-preview - max_input_tokens: 128000 - input_price: 0 - output_price: 0 - name: gemma2-9b-it max_input_tokens: 8192 input_price: 0 @@ -432,8 +423,6 @@ - name: qwen2.5-coder max_input_tokens: 32768 supports_function_calling: true - - name: deepseek-coder-v2 - max_input_tokens: 32768 - name: gemma2 max_input_tokens: 8192 - name: nomic-embed-text @@ -463,13 +452,6 @@ output_price: 0.075 supports_vision: true supports_function_calling: true - - name: gemini-1.0-pro-002 - max_input_tokens: 24568 - max_output_tokens: 8192 - input_price: 0.125 - output_price: 0.375 - supports_function_calling: true - no_system_message: true - name: claude-3-5-sonnet-v2@20241022 max_input_tokens: 200000 max_output_tokens: 8192 @@ -518,7 +500,7 @@ output_price: 1.25 supports_vision: true supports_function_calling: true - - name: mistral-large@2407 + - name: mistral-large-2411 max_input_tokens: 128000 input_price: 2 output_price: 6 @@ -638,18 +620,6 @@ output_price: 0.35 supports_function_calling: true supports_vision: true - - name: us.meta.llama3-2-3b-instruct-v1:0 - max_input_tokens: 128000 - max_output_tokens: 2048 - require_max_tokens: true - input_price: 0.15 - output_price: 0.15 - - name: us.meta.llama3-2-1b-instruct-v1:0 - max_input_tokens: 128000 - max_output_tokens: 2048 - require_max_tokens: true - input_price: 0.1 - output_price: 0.1 - name: us.amazon.nova-pro-v1:0 max_input_tokens: 300000 max_output_tokens: 5120 @@ -734,18 +704,6 @@ require_max_tokens: true input_price: 0 output_price: 0 - - name: '@cf/meta/llama-3.2-3b-instruct' - max_input_tokens: 6144 - max_output_tokens: 2048 - require_max_tokens: true - input_price: 0 - output_price: 0 - - name: '@cf/meta/llama-3.2-1b-instruct' - max_input_tokens: 6144 - max_output_tokens: 2048 - require_max_tokens: true - input_price: 0 - output_price: 0 - name: '@cf/baai/bge-large-en-v1.5' type: embedding input_price: 0 @@ -838,6 +796,12 @@ max_input_tokens: 1000000 input_price: 0.07 output_price: 0.28 + - name: qwq-32b-preview + max_input_tokens: 30720 + max_output_tokens: 16384 + input_price: 0.49 + output_price: 0.98 + supports_function_calling: true - name: qwen-vl-max-latest input_price: 2.8 output_price: 2.8 @@ -921,12 +885,17 @@ input_price: 0.56 output_price: 1.12 supports_function_calling: true + - name: hunyuan-turbo-vision + max_input_tokens: 6144 + max_output_tokens: 2048 + input_price: 11.2 + output_price: 11.2 + supports_vision: true - name: hunyuan-vision max_input_tokens: 6144 max_output_tokens: 2048 input_price: 2.52 output_price: 2.52 - supports_function_calling: true supports_vision: true - name: hunyuan-embedding type: embedding @@ -1072,39 +1041,32 @@ max_tokens_per_chunk: 8191 default_chunk_size: 2000 max_batch_size: 100 + - name: llama-3.3-70b-instruct + max_input_tokens: 128000 - name: meta-llama-3.1-405b-instruct max_input_tokens: 128000 - name: meta-llama-3.1-70b-instruct max_input_tokens: 128000 - name: meta-llama-3.1-8b-instruct max_input_tokens: 128000 - - name: meta-llama-3.2-90b-vision-instruct + - name: llama-3.2-90b-vision-instruct max_input_tokens: 8192 supports_vision: true - - name: meta-llama-3.2-11b-vision-instruct + - name: llama-3.2-11b-vision-instruct max_input_tokens: 8192 supports_vision: true - - name: mistral-large-2407 + - name: mistral-large-2411 max_input_tokens: 128000 supports_function_calling: true - name: mistral-nemo max_input_tokens: 128000 supports_function_calling: true - - name: ministral-3b - max_input_tokens: 128000 - supports_function_calling: true - name: cohere-command-r-plus-08-2024 max_input_tokens: 128000 supports_function_calling: true - - name: cohere-command-r-plus - max_input_tokens: 128000 - supports_function_calling: true - name: cohere-command-r-08-2024 max_input_tokens: 128000 supports_function_calling: true - - name: cohere-command-r - max_input_tokens: 128000 - supports_function_calling: true - name: cohere-embed-v3-english type: embedding max_tokens_per_chunk: 512 @@ -1161,14 +1123,6 @@ max_input_tokens: 128000 input_price: 0.055 output_price: 0.055 - - name: meta-llama/Llama-3.2-3B-Instruct - max_input_tokens: 128000 - input_price: 0.03 - output_price: 0.05 - - name: meta-llama/Llama-3.2-1B-Instruct - max_input_tokens: 128000 - input_price: 0.01 - output_price: 0.02 - name: mistralai/Mistral-Nemo-Instruct-2407 max_input_tokens: 128000 input_price: 0.13 @@ -1263,14 +1217,6 @@ input_price: 0.2 output_price: 0.2 supports_vision: true - - name: accounts/fireworks/models/llama-v3p2-3b-instruct - max_input_tokens: 131072 - input_price: 0.1 - output_price: 0.1 - - name: accounts/fireworks/models/llama-v3p2-1b-instruct - max_input_tokens: 131072 - input_price: 0.1 - output_price: 0.1 - name: accounts/fireworks/models/gemma2-9b-it max_input_tokens: 8192 input_price: 0.2 @@ -1348,12 +1294,6 @@ output_price: 10 supports_vision: true supports_function_calling: true - - name: openai/gpt-4o-2024-05-13 - max_input_tokens: 128000 - input_price: 5 - output_price: 15 - supports_vision: true - supports_function_calling: true - name: openai/chatgpt-4o-latest max_input_tokens: 128000 input_price: 5 @@ -1481,14 +1421,6 @@ input_price: 0.055 output_price: 0.055 supports_vision: true - - name: meta-llama/llama-3.2-3b-instruct - max_input_tokens: 131072 - input_price: 0.03 - output_price: 0.05 - - name: meta-llama/llama-3.2-1b-instruct - max_input_tokens: 131072 - input_price: 0.01 - output_price: 0.02 - name: mistralai/mistral-large max_input_tokens: 128000 input_price: 2 @@ -1543,21 +1475,16 @@ input_price: 2.5 output_price: 10 supports_function_calling: true - - name: cohere/command-r-plus - max_input_tokens: 128000 - input_price: 2.5 - output_price: 10 - supports_function_calling: true - name: cohere/command-r-08-2024 max_input_tokens: 128000 input_price: 0.15 output_price: 0.6 supports_function_calling: true - - name: cohere/command-r + - name: cohere/command-r7b-12-2024 max_input_tokens: 128000 - input_price: 0.15 - output_price: 0.6 - supports_function_calling: true + max_output_tokens: 4096 + input_price: 0.0375 + output_price: 0.15 - name: deepseek/deepseek-chat max_input_tokens: 32768 input_price: 0.14 @@ -1613,10 +1540,22 @@ input_price: 0.35 output_price: 0.4 supports_function_calling: true + - name: x-ai/grok-2-1212 + max_input_tokens: 131072 + input_price: 2 + output_price: 10 + supports_function_calling: true - name: x-ai/grok-beta max_input_tokens: 32768 input_price: 5 output_price: 15 + supports_function_calling: true + - name: x-ai/grok-2-vision-1212 + max_input_tokens: 32768 + input_price: 2 + output_price: 10 + supports_vision: true + supports_function_calling: true - name: x-ai/grok-vision-beta max_input_tokens: 8192 input_price: 5 @@ -1706,10 +1645,6 @@ input_price: 0.186 output_price: 0.186 supports_function_calling: true - - name: Tencent/Hunyuan-A52B-Instruct - max_input_tokens: 32768 - input_price: 2.94 - output_price: 2.94 - name: BAAI/bge-large-en-v1.5 type: embedding input_price: 0 @@ -1734,8 +1669,7 @@ input_price: 0 # Links: -# - https://docs.together.ai/docs/inference-models -# - https://docs.together.ai/docs/embedding-models +# - https://docs.together.ai/docs/serverless-models # - https://www.together.ai/pricing - platform: together models: @@ -1769,10 +1703,6 @@ input_price: 0.18 output_price: 0.18 supports_vision: true - - name: meta-llama/Llama-3.2-3B-Instruct-Turbo - max_input_tokens: 131072 - input_price: 0.06 - output_price: 0.06 - name: google/gemma-2-27b-it max_input_tokens: 8192 input_price: 0.8 @@ -1831,7 +1761,7 @@ max_tokens_per_chunk: 8192 default_chunk_size: 1500 max_batch_size: 100 - - name: jina-clip-v1 + - name: jina-clip-v2 type: embedding input_price: 0 max_tokens_per_chunk: 8192 @@ -1852,6 +1782,13 @@ # - https://docs.voyageai.com/reference/ - platform: voyageai models: + - name: voyage-3-large + type: embedding + max_input_tokens: 120000 + input_price: 0.18 + max_tokens_per_chunk: 32000 + default_chunk_size: 2000 + max_batch_size: 128 - name: voyage-3 type: embedding max_input_tokens: 320000