Skip to content

Commit

Permalink
fix: handle typescript json format when mistralai returns this
Browse files Browse the repository at this point in the history
  • Loading branch information
sneko committed Mar 1, 2024
1 parent 2e77660 commit 24f5073
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 1 deletion.
24 changes: 24 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@
"i18next": "^22.4.5",
"i18next-browser-languagedetector": "^7.0.1",
"jsdom": "^21.1.1",
"jsonic": "^1.0.1",
"langchain": "^0.1.7",
"linkify-it": "^5.0.0",
"locks": "^0.2.2",
Expand Down Expand Up @@ -202,6 +203,7 @@
"@types/http-errors": "^2.0.3",
"@types/jest": "^29.4.0",
"@types/jsdom": "^21.1.1",
"@types/jsonic": "^0.3.3",
"@types/linkify-it": "^3.0.5",
"@types/locks": "^0.2.1",
"@types/lodash.debounce": "^4.0.7",
Expand Down
22 changes: 21 additions & 1 deletion src/features/llm-langchain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { CronJob } from 'cron';
import { minutesToMilliseconds } from 'date-fns/minutesToMilliseconds';
import { subHours } from 'date-fns/subHours';
import fs from 'fs/promises';
import jsonic from 'jsonic';
import { LLMChain } from 'langchain/chains';
import { createStuffDocumentsChain } from 'langchain/chains/combine_documents';
import { createRetrievalChain } from 'langchain/chains/retrieval';
Expand All @@ -17,7 +18,12 @@ import mistralTokenizer from 'mistral-tokenizer-js';
import path from 'path';
import { z } from 'zod';

import { ChunkEventEmitter, LlmManager, extractFirstJsonCodeContentFromMarkdown } from '@etabli/src/features/llm';
import {
ChunkEventEmitter,
LlmManager,
extractFirstJsonCodeContentFromMarkdown,
extractFirstTypescriptCodeContentFromMarkdown,
} from '@etabli/src/features/llm';
import { gptInstances, gptSeed } from '@etabli/src/gpt';
import { DocumentInitiativeTemplateSchema, ResultSchema, ResultSchemaType } from '@etabli/src/gpt/template';
import { tokensReachTheLimitError } from '@etabli/src/models/entities/errors';
Expand Down Expand Up @@ -470,6 +476,20 @@ CONTEXT:

throw new Error(`the json code block is not present in the answer or the answer has been truncated while saying it's complete`);
}
} else if (answer.text.includes('```ts')) {
const typescriptCode = extractFirstTypescriptCodeContentFromMarkdown(answer.text);

if (!typescriptCode) {
console.log(answer.text);

throw new Error(`the typescript code block is not present in the answer or the answer has been truncated while saying it's complete`);
}

// That's the pattern MistralAI seems to always provide when returning TypeScript format
const jsonStringNotStrict = typescriptCode.replace('type ResultSchemaType =', '').trim();

// A JSON object in TypeScript cannot be parsed due to missing quotes on properties, ending comma... so using a helper for this
jsonString = JSON.stringify(jsonic(jsonStringNotStrict));
}

if (!jsonString) {
Expand Down
7 changes: 7 additions & 0 deletions src/features/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ export function extractFirstJsonCodeContentFromMarkdown(markdown: string): strin
return !!regexResult ? regexResult[1] : null;
}

export function extractFirstTypescriptCodeContentFromMarkdown(markdown: string): string | null {
const regex = /```ts\n([\s\S]+?)\n```/;
const regexResult = regex.exec(markdown);

return !!regexResult ? regexResult[1] : null;
}

export async function initLlmSystem() {
// Note: Prisma does not implement yet locking table though it should help not messing with requesting the LLM system while replacing sensitive components of it
// This race condition should remain rare and having error thrown should be fine since replayed on the next iteration
Expand Down

0 comments on commit 24f5073

Please sign in to comment.