fix(llm): the token limit can be different on embeddings compared to …

…the model limit
betagouv · Mar 20, 2024 · 7ed697d · 7ed697d
1 parent a646e90
commit 7ed697d
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 2 deletions.
diff --git a/src/features/llm-langchain.ts b/src/features/llm-langchain.ts
@@ -384,9 +384,12 @@ export class LangchainWithLocalVectorStoreLlmManager implements LlmManager {
         // Note: below we do `+1` to take in account the pontentially added document
         const currentTokensFingerprintOfBatching = Math.max(documentsChunks[currentChunk].length - 1, 0);
 
-        if (documentTokens.length >= this.gptInstance.modelTokenLimit) {
+        if (documentTokens.length >= this.gptInstance.embeddingsTokenLimit) {
           throw new Error('an initiative document should not be huge and triggering the llm limit');
-        } else if (currentChunkTokensCounter + documentTokens.length + (currentTokensFingerprintOfBatching + 1) >= this.gptInstance.modelTokenLimit) {
+        } else if (
+          currentChunkTokensCounter + documentTokens.length + (currentTokensFingerprintOfBatching + 1) >=
+          this.gptInstance.embeddingsTokenLimit
+        ) {
           // If adding this document to previous ones is over the tokens limit for, use a new chunk
           currentChunk += 1;
           documentsChunks.push([]);
@@ -474,6 +477,7 @@ CONTEXTE :
 
     // To help the LLM we give inside the context tools we are looking for
     // Since we cannot give the 8k+ tools from our database, we try to provide a subset meaningful according to extracted tech references we retrieved
+    // Note: we did not check the `embeddingsTokenLimit` since it has never been reached, if needed take example at documents computation to prepare chunks
     const rawToolsVectors = await this.toolsVectorStore.embeddings.embedDocuments(rawToolsFromAnalysis.filter((item) => item.trim() !== ''));
 
     const contextTools: string[] = [];
@@ -675,6 +679,7 @@ CONTEXTE :
   }
 
   public truncateContentBasedOnTokens(content: string, maximumTokens: number): string {
+    // Note the token limit we use is about the model, not for embeddings (adjust if needed)
     if (maximumTokens > this.gptInstance.modelTokenLimit) {
       console.warn(
         `the tokens truncate ceil specified (${maximumTokens}) is above the llm limit of ${this.gptInstance.modelTokenLimit} tokens, so defaulting to the latter`

diff --git a/src/gpt/index.ts b/src/gpt/index.ts
@@ -4,6 +4,7 @@ export interface GptSettings {
   model: string;
   countModel: TiktokenModel; // The counter does not understand precise GPT versions
   modelTokenLimit: number; // Precise token maximum can be found on https://www.scriptbyai.com/token-limit-openai-chatgpt/
+  embeddingsTokenLimit: number; // Didn't find a list but considering `16385` as default is good enough, and adjust if needed according to the provider
   per1000TokensCost: number; // This is about input tokens (since our outputs should be small, we don't consider them here)
 }
 
@@ -26,63 +27,73 @@ export const gptInstances: Record<GptInstance, GptSettings> = {
     model: 'gpt-3.5-turbo-1106',
     countModel: 'gpt-3.5-turbo',
     modelTokenLimit: 16385,
+    embeddingsTokenLimit: 16385,
     per1000TokensCost: 0.001,
   },
   v4: {
     model: 'gpt-4-1106-preview',
     countModel: 'gpt-4',
     modelTokenLimit: 16385,
+    embeddingsTokenLimit: 16385,
     per1000TokensCost: 0.01,
   },
   // MistralAI
   deprecatedMistralTiny: {
     model: 'mistral-tiny', // mistral7b
     countModel: 'gpt-4',
     modelTokenLimit: 16385,
+    embeddingsTokenLimit: 16385,
     per1000TokensCost: 0.00014,
   },
   deprecatedMistralSmall: {
     model: 'mistral-small', // mixtral8x7b
     countModel: 'gpt-4',
     modelTokenLimit: 16385,
+    embeddingsTokenLimit: 16385,
     per1000TokensCost: 0.0006,
   },
   deprecatedMistralMedium: {
     model: 'mistral-medium', // ...
     countModel: 'gpt-4',
     modelTokenLimit: 16385,
+    embeddingsTokenLimit: 16385,
     per1000TokensCost: 0.0025,
   },
   mistral7b: {
     // New version of `tiny` a bit more expensive with more tokens capacity
     model: 'open-mistral-7b', // mistral7b
     countModel: 'gpt-4',
     modelTokenLimit: 32768,
+    embeddingsTokenLimit: 16385,
     per1000TokensCost: 0.0002,
   },
   mistral8x7b: {
     // New version of `small` a bit more expensive with more tokens capacity
     model: 'open-mixtral-8x7b', // mixtral8x7b
     countModel: 'gpt-4',
     modelTokenLimit: 32768,
+    embeddingsTokenLimit: 16385,
     per1000TokensCost: 0.00065,
   },
   mistralSmall: {
     model: 'mistral-small-latest',
     countModel: 'gpt-4',
     modelTokenLimit: 32768,
+    embeddingsTokenLimit: 16385,
     per1000TokensCost: 0.0055,
   },
   mistralMedium: {
     model: 'mistral-medium-latest',
     countModel: 'gpt-4',
     modelTokenLimit: 32768,
+    embeddingsTokenLimit: 16385,
     per1000TokensCost: 0.0075,
   },
   mistralLarge: {
     model: 'mistral-large-latest',
     countModel: 'gpt-4',
     modelTokenLimit: 32768,
+    embeddingsTokenLimit: 16385,
     per1000TokensCost: 0.022,
   },
 };