Skip to content

Commit

Permalink
fix(llm): the token limit can be different on embeddings compared to …
Browse files Browse the repository at this point in the history
…the model limit
  • Loading branch information
sneko committed Mar 20, 2024
1 parent a646e90 commit 7ed697d
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 2 deletions.
9 changes: 7 additions & 2 deletions src/features/llm-langchain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -384,9 +384,12 @@ export class LangchainWithLocalVectorStoreLlmManager implements LlmManager {
// Note: below we do `+1` to take in account the pontentially added document
const currentTokensFingerprintOfBatching = Math.max(documentsChunks[currentChunk].length - 1, 0);

if (documentTokens.length >= this.gptInstance.modelTokenLimit) {
if (documentTokens.length >= this.gptInstance.embeddingsTokenLimit) {
throw new Error('an initiative document should not be huge and triggering the llm limit');
} else if (currentChunkTokensCounter + documentTokens.length + (currentTokensFingerprintOfBatching + 1) >= this.gptInstance.modelTokenLimit) {
} else if (
currentChunkTokensCounter + documentTokens.length + (currentTokensFingerprintOfBatching + 1) >=
this.gptInstance.embeddingsTokenLimit
) {
// If adding this document to previous ones is over the tokens limit for, use a new chunk
currentChunk += 1;
documentsChunks.push([]);
Expand Down Expand Up @@ -474,6 +477,7 @@ CONTEXTE :

// To help the LLM we give inside the context tools we are looking for
// Since we cannot give the 8k+ tools from our database, we try to provide a subset meaningful according to extracted tech references we retrieved
// Note: we did not check the `embeddingsTokenLimit` since it has never been reached, if needed take example at documents computation to prepare chunks
const rawToolsVectors = await this.toolsVectorStore.embeddings.embedDocuments(rawToolsFromAnalysis.filter((item) => item.trim() !== ''));

const contextTools: string[] = [];
Expand Down Expand Up @@ -675,6 +679,7 @@ CONTEXTE :
}

public truncateContentBasedOnTokens(content: string, maximumTokens: number): string {
// Note the token limit we use is about the model, not for embeddings (adjust if needed)
if (maximumTokens > this.gptInstance.modelTokenLimit) {
console.warn(
`the tokens truncate ceil specified (${maximumTokens}) is above the llm limit of ${this.gptInstance.modelTokenLimit} tokens, so defaulting to the latter`
Expand Down
11 changes: 11 additions & 0 deletions src/gpt/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export interface GptSettings {
model: string;
countModel: TiktokenModel; // The counter does not understand precise GPT versions
modelTokenLimit: number; // Precise token maximum can be found on https://www.scriptbyai.com/token-limit-openai-chatgpt/
embeddingsTokenLimit: number; // Didn't find a list but considering `16385` as default is good enough, and adjust if needed according to the provider
per1000TokensCost: number; // This is about input tokens (since our outputs should be small, we don't consider them here)
}

Expand All @@ -26,63 +27,73 @@ export const gptInstances: Record<GptInstance, GptSettings> = {
model: 'gpt-3.5-turbo-1106',
countModel: 'gpt-3.5-turbo',
modelTokenLimit: 16385,
embeddingsTokenLimit: 16385,
per1000TokensCost: 0.001,
},
v4: {
model: 'gpt-4-1106-preview',
countModel: 'gpt-4',
modelTokenLimit: 16385,
embeddingsTokenLimit: 16385,
per1000TokensCost: 0.01,
},
// MistralAI
deprecatedMistralTiny: {
model: 'mistral-tiny', // mistral7b
countModel: 'gpt-4',
modelTokenLimit: 16385,
embeddingsTokenLimit: 16385,
per1000TokensCost: 0.00014,
},
deprecatedMistralSmall: {
model: 'mistral-small', // mixtral8x7b
countModel: 'gpt-4',
modelTokenLimit: 16385,
embeddingsTokenLimit: 16385,
per1000TokensCost: 0.0006,
},
deprecatedMistralMedium: {
model: 'mistral-medium', // ...
countModel: 'gpt-4',
modelTokenLimit: 16385,
embeddingsTokenLimit: 16385,
per1000TokensCost: 0.0025,
},
mistral7b: {
// New version of `tiny` a bit more expensive with more tokens capacity
model: 'open-mistral-7b', // mistral7b
countModel: 'gpt-4',
modelTokenLimit: 32768,
embeddingsTokenLimit: 16385,
per1000TokensCost: 0.0002,
},
mistral8x7b: {
// New version of `small` a bit more expensive with more tokens capacity
model: 'open-mixtral-8x7b', // mixtral8x7b
countModel: 'gpt-4',
modelTokenLimit: 32768,
embeddingsTokenLimit: 16385,
per1000TokensCost: 0.00065,
},
mistralSmall: {
model: 'mistral-small-latest',
countModel: 'gpt-4',
modelTokenLimit: 32768,
embeddingsTokenLimit: 16385,
per1000TokensCost: 0.0055,
},
mistralMedium: {
model: 'mistral-medium-latest',
countModel: 'gpt-4',
modelTokenLimit: 32768,
embeddingsTokenLimit: 16385,
per1000TokensCost: 0.0075,
},
mistralLarge: {
model: 'mistral-large-latest',
countModel: 'gpt-4',
modelTokenLimit: 32768,
embeddingsTokenLimit: 16385,
per1000TokensCost: 0.022,
},
};
Expand Down

0 comments on commit 7ed697d

Please sign in to comment.